Compare commits
104 Commits
v2.4
...
mqtt-refactor
| Author | SHA1 | Date | |
|---|---|---|---|
| ef0211505a | |||
| c961fc8814 | |||
| cfa90ea5d5 | |||
| 0dbb84b355 | |||
| b1ff836c4e | |||
| 3eb4368b37 | |||
| a2df6149bb | |||
| 5b60b17dd3 | |||
| 62626cc6a1 | |||
| 2e09b75f52 | |||
| aa658fb29b | |||
| ad2e208fc2 | |||
| f32cd5c93a | |||
| 927123d491 | |||
| 9cc9e2efe6 | |||
| b9ef100721 | |||
| da5b9b2fb5 | |||
| 27c681f758 | |||
| 2af6130d22 | |||
| 268c2c5295 | |||
| 797615acf7 | |||
| ec786fd5db | |||
| bcda393d7b | |||
| e7a67ad2be | |||
| 306a7e62bd | |||
| 99b35270c8 | |||
| a69b445d51 | |||
| 59ee156c1d | |||
| afcc2c59d4 | |||
| 25afcc3559 | |||
| 8cd3a10299 | |||
| 67a841c080 | |||
| 28e098330a | |||
| 91f6571662 | |||
| d9ef37c005 | |||
| f93dfe8a4e | |||
| 1322ba3a3b | |||
| 163cd9670c | |||
| 580ea54b42 | |||
| c4a9b60990 | |||
| 89875f1644 | |||
| 6f6924cee4 | |||
| 19830d8d39 | |||
| a9ccba37ee | |||
| 21ab4d4be2 | |||
| 72383facae | |||
| d663c880f7 | |||
| cad172b450 | |||
| 713b199669 | |||
| 8fdbddb2b8 | |||
| df974c4faf | |||
| 72fd9ced65 | |||
| a82ecb8b52 | |||
| d052be5290 | |||
| 481bd3883f | |||
| 97c68d1d0d | |||
| b0500afa3f | |||
| b356d2218f | |||
| a5ce7e6ef3 | |||
| b5109850ae | |||
| 974784bb4f | |||
| 81680f00d2 | |||
| 4718800ec0 | |||
| a9f4122875 | |||
| 11d311f7ed | |||
| 7c50ea5790 | |||
| 18dca38e9a | |||
| 94b59c16bc | |||
| 7921287040 | |||
| 38211e06ba | |||
| 2269bebf33 | |||
| 565506b1df | |||
| cc7e1b9a25 | |||
| c76c78674c | |||
| 3a925952e2 | |||
| 9f6420e4cc | |||
| 2e720ffa67 | |||
| 55a4788cc6 | |||
| 6cf18735c5 | |||
| f27e333ac8 | |||
| 547a63ab59 | |||
| 45ea5996ce | |||
| 37cf6c85da | |||
| 4383145401 | |||
| f7ed88de8b | |||
| b1d7695a4c | |||
| f58a2451cf | |||
| 049a173b14 | |||
| c02ff73be8 | |||
| cda3a02775 | |||
| 292a2fdf10 | |||
| 07dcbebf79 | |||
| 59ba6e5dda | |||
| d08b62148d | |||
| 91bce4cb8b | |||
| d8d6486508 | |||
| 3f1d0946be | |||
| e744330761 | |||
| 5e6030818d | |||
| 8d0f6f37a2 | |||
| 396531f6ec | |||
| 51d9bc0c8f | |||
| bea3f30789 | |||
| 9f8babff34 |
@@ -1,4 +1,4 @@
|
||||
.PHONY: web-dist docker manifest docs-uml g2p
|
||||
.PHONY: web-dist docker manifest docs-uml g2p check
|
||||
SHELL := bash
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -83,3 +83,7 @@ g2p: $(G2P_MODELS)
|
||||
|
||||
mypy:
|
||||
mypy app.py rhasspy
|
||||
|
||||
check:
|
||||
flake8 --exclude=lexconvert.py app.py test.py rhasspy/*.py
|
||||
pylint --ignore=lexconvert.py app.py test.py rhasspy/*.py
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
Rhasspy (pronounced RAH-SPEE) is an offline, [multilingual](#supported-languages) voice assistant toolkit inspired by [Jasper](https://jasperproject.github.io/) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
* [Documentation](https://rhasspy.readthedocs.io/)
|
||||
* [Discussion](https://community.rhasspy.org)
|
||||
* [Video Introduction](https://www.youtube.com/watch?v=ijKTR_GqWwA)
|
||||
* [Hass.IO Add-On Repository](https://github.com/synesthesiam/hassio-addons)
|
||||
* [Discussion](https://community.home-assistant.io/t/rhasspy-offline-voice-assistant-toolkit/60862)
|
||||
|
||||
Rhasspy transca voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](https://rhasspy.readthedocs.io/en/latest/profiles/) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
Rhasspy transcribes voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](https://rhasspy.readthedocs.io/en/latest/usage/#node-red). You define custom voice commands in a [profile](https://rhasspy.readthedocs.io/en/latest/profiles/) using a [specialized template syntax](https://rhasspy.readthedocs.io/en/latest/training/#sentencesini), and Rhasspy takes care of the rest.
|
||||
|
||||
To run Rhasspy with the English (en) profile using Docker:
|
||||
|
||||
|
||||
@@ -8,9 +8,10 @@ import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple, Union
|
||||
from typing import Any, Dict, List, Tuple, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import attr
|
||||
from quart import (
|
||||
Quart,
|
||||
Response,
|
||||
@@ -22,6 +23,7 @@ from quart import (
|
||||
websocket,
|
||||
)
|
||||
from quart_cors import cors
|
||||
from swagger_ui import quart_api_doc
|
||||
|
||||
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
|
||||
from rhasspy.core import RhasspyCore
|
||||
@@ -30,18 +32,19 @@ from rhasspy.intent import IntentRecognized
|
||||
from rhasspy.utils import (
|
||||
FunctionLoggingHandler,
|
||||
buffer_to_wav,
|
||||
load_phoneme_examples,
|
||||
recursive_remove,
|
||||
get_all_intents,
|
||||
get_ini_paths,
|
||||
get_wav_duration,
|
||||
load_phoneme_examples,
|
||||
read_dict,
|
||||
recursive_remove,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Flask Web App Setup
|
||||
# Quart Web App Setup
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.root.setLevel(logging.DEBUG)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
@@ -82,8 +85,15 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
"--ssl", nargs=2, help="Use SSL with <CERT_FILE <KEY_FILE>", default=None
|
||||
)
|
||||
parser.add_argument("--log-level", default="DEBUG", help="Set logging level")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set log level
|
||||
log_level = getattr(logging, args.log_level.upper())
|
||||
logging.basicConfig(level=log_level)
|
||||
|
||||
|
||||
logger.debug(args)
|
||||
|
||||
system_profiles_dir = os.path.abspath(args.system_profiles)
|
||||
@@ -147,6 +157,15 @@ async def start_rhasspy() -> None:
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/version")
|
||||
async def api_version() -> Response:
|
||||
"""Get Rhasspy version."""
|
||||
return await send_file(Path("VERSION"))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/profiles")
|
||||
async def api_profiles() -> Response:
|
||||
"""Get list of available profiles and verify necessary files."""
|
||||
@@ -294,7 +313,7 @@ async def api_profile() -> Union[str, Response]:
|
||||
if layers == "profile":
|
||||
# Local settings only
|
||||
profile_path = Path(core.profile.read_path("profile.json"))
|
||||
return send_file(profile_path) # , mimetype="application/json")
|
||||
return await send_file(profile_path)
|
||||
|
||||
return jsonify(core.profile.json)
|
||||
|
||||
@@ -415,7 +434,37 @@ async def api_sentences():
|
||||
assert core is not None
|
||||
|
||||
if request.method == "POST":
|
||||
# Update sentences
|
||||
# POST
|
||||
if request.mimetype == "application/json":
|
||||
# Update multiple ini files at once. Paths as keys (relative to
|
||||
# profile directory), sentences as values.
|
||||
num_chars = 0
|
||||
paths_written = []
|
||||
|
||||
sentences_dict = await request.json
|
||||
for sentences_path, sentences_text in sentences_dict.items():
|
||||
# Path is relative to profile directory
|
||||
sentences_path = Path(core.profile.write_path(sentences_path))
|
||||
|
||||
if sentences_text.strip():
|
||||
# Overwrite file
|
||||
logger.debug("Writing %s", sentences_path)
|
||||
|
||||
sentences_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
sentences_path.write_text(sentences_text)
|
||||
|
||||
num_chars += len(sentences_text)
|
||||
paths_written.append(sentences_path)
|
||||
elif sentences_path.is_file():
|
||||
# Remove file
|
||||
logger.debug("Removing %s", sentences_path)
|
||||
sentences_path.unlink()
|
||||
|
||||
return "Wrote {} char(s) to {}".format(
|
||||
num_chars, [str(p) for p in paths_written]
|
||||
)
|
||||
|
||||
# Update sentences.ini only
|
||||
sentences_path = Path(
|
||||
core.profile.write_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
)
|
||||
@@ -423,18 +472,48 @@ async def api_sentences():
|
||||
data = await request.data
|
||||
with open(sentences_path, "wb") as sentences_file:
|
||||
sentences_file.write(data)
|
||||
return "Wrote %s byte(s) to %s" % (len(data), sentences_path)
|
||||
return "Wrote {} byte(s) to {}".format(len(data), sentences_path)
|
||||
|
||||
# Return sentences
|
||||
sentences_path = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
# GET
|
||||
sentences_path_rel = core.profile.read_path(
|
||||
core.profile.get("speech_to_text.sentences_ini")
|
||||
)
|
||||
sentences_path = Path(sentences_path_rel)
|
||||
|
||||
if prefers_json():
|
||||
# Return multiple .ini files, keyed by path relative to profile
|
||||
# directory.
|
||||
sentences_dict = {}
|
||||
if sentences_path.is_file():
|
||||
try:
|
||||
# Try user profile dir first
|
||||
profile_dir = Path(core.profile.user_profiles_dir) / core.profile.name
|
||||
key = str(sentences_path.relative_to(profile_dir))
|
||||
except Exception:
|
||||
# Fall back to system profile dir
|
||||
profile_dir = Path(core.profile.system_profiles_dir) / core.profile.name
|
||||
key = str(sentences_path.relative_to(profile_dir))
|
||||
|
||||
sentences_dict[key] = sentences_path.read_text()
|
||||
|
||||
ini_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
|
||||
)
|
||||
|
||||
# Add all .ini files from sentences_dir
|
||||
if ini_dir.is_dir():
|
||||
for ini_path in ini_dir.glob("*.ini"):
|
||||
key = str(ini_path.relative_to(core.profile.read_path()))
|
||||
sentences_dict[key] = ini_path.read_text()
|
||||
|
||||
return jsonify(sentences_dict)
|
||||
|
||||
# Return sentences.ini contents only
|
||||
if not sentences_path.is_file():
|
||||
return "" # no sentences yet
|
||||
|
||||
# Return file contents
|
||||
return await send_file(sentences_path) # , mimetype="text/plain")
|
||||
return await send_file(sentences_path)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -449,7 +528,9 @@ async def api_custom_words():
|
||||
if request.method == "POST":
|
||||
custom_words_path = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.custom_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -470,7 +551,9 @@ async def api_custom_words():
|
||||
|
||||
custom_words_path = Path(
|
||||
core.profile.read_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.custom_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -682,7 +765,9 @@ async def api_unknown_words() -> Response:
|
||||
unknown_words = {}
|
||||
unknown_path = Path(
|
||||
core.profile.read_path(
|
||||
core.profile.get(f"speech_to_text.{speech_system}.unknown_words")
|
||||
core.profile.get(
|
||||
f"speech_to_text.{speech_system}.unknown_words", "unknown_words.txt"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -702,18 +787,27 @@ last_sentence = ""
|
||||
|
||||
|
||||
@app.route("/api/text-to-speech", methods=["POST"])
|
||||
async def api_text_to_speech() -> str:
|
||||
async def api_text_to_speech() -> Union[bytes, str]:
|
||||
"""Speak a sentence with text to speech system."""
|
||||
global last_sentence
|
||||
repeat = request.args.get("repeat", "false").strip().lower() == "true"
|
||||
play = request.args.get("play", "true").strip().lower() == "true"
|
||||
language = request.args.get("language")
|
||||
voice = request.args.get("voice")
|
||||
data = await request.data
|
||||
sentence = last_sentence if repeat else data.decode().strip()
|
||||
|
||||
assert core is not None
|
||||
await core.speak_sentence(sentence)
|
||||
result = await core.speak_sentence(
|
||||
sentence, play=play, language=language, voice=voice
|
||||
)
|
||||
|
||||
last_sentence = sentence
|
||||
|
||||
if not play:
|
||||
# Return WAV data instead of speaking
|
||||
return result.wav_data
|
||||
|
||||
return sentence
|
||||
|
||||
|
||||
@@ -725,14 +819,16 @@ async def api_slots() -> Union[str, Response]:
|
||||
"""Get the values of all slots."""
|
||||
assert core is not None
|
||||
|
||||
slots_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.slots_dir"))
|
||||
)
|
||||
|
||||
if request.method == "POST":
|
||||
overwrite_all = request.args.get("overwrite_all", "false").lower() == "true"
|
||||
new_slot_values = await request.json
|
||||
|
||||
slots_dir = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get("speech_to_text.slots_dir", "slots")
|
||||
)
|
||||
)
|
||||
|
||||
if overwrite_all:
|
||||
# Remote existing values first
|
||||
for name in new_slot_values.keys():
|
||||
@@ -747,32 +843,40 @@ async def api_slots() -> Union[str, Response]:
|
||||
if isinstance(values, str):
|
||||
values = [values]
|
||||
|
||||
slots_path = Path(
|
||||
core.profile.write_path(
|
||||
core.profile.get("speech_to_text.slots_dir", "slots"), f"{name}"
|
||||
)
|
||||
)
|
||||
slots_path = slots_dir / name
|
||||
|
||||
# Create directories
|
||||
slots_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write data
|
||||
with open(slots_path, "w") as slots_file:
|
||||
for value in values:
|
||||
value = value.strip()
|
||||
if value:
|
||||
print(value, file=slots_file)
|
||||
# Merge with existing values
|
||||
values = set(values)
|
||||
if slots_path.is_file():
|
||||
values.update(line for line in slots_path.read_text().splitlines())
|
||||
|
||||
# Write merged values
|
||||
if values:
|
||||
with open(slots_path, "w") as slots_file:
|
||||
for value in values:
|
||||
value = value.strip()
|
||||
if value:
|
||||
print(value, file=slots_file)
|
||||
|
||||
return "OK"
|
||||
|
||||
# Read slots into dictionary
|
||||
slots_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.slots_dir", "slots"))
|
||||
)
|
||||
|
||||
slots_dict = {}
|
||||
for slot_file_path in slots_dir.glob("*"):
|
||||
if slot_file_path.is_file():
|
||||
slot_name = slot_file_path.name
|
||||
slots_dict[slot_name] = [
|
||||
line.strip() for line in slot_file_path.read_text().splitlines()
|
||||
]
|
||||
|
||||
if slots_dir.is_dir():
|
||||
for slot_file_path in slots_dir.glob("*"):
|
||||
if slot_file_path.is_file():
|
||||
slot_name = slot_file_path.name
|
||||
slots_dict[slot_name] = [
|
||||
line.strip() for line in slot_file_path.read_text().splitlines()
|
||||
]
|
||||
|
||||
return jsonify(slots_dict)
|
||||
|
||||
@@ -824,6 +928,73 @@ def api_slots_by_name(name: str) -> Union[str, Response]:
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/api/intents")
|
||||
def api_intents():
|
||||
"""Return JSON with information about intents."""
|
||||
assert core is not None
|
||||
|
||||
sentences_ini = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
|
||||
)
|
||||
|
||||
sentences_dir = Path(
|
||||
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
|
||||
)
|
||||
|
||||
# Load all .ini files and parse
|
||||
ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)
|
||||
intents: Dict[str, Any] = get_all_intents(ini_paths)
|
||||
|
||||
def add_type(item, item_dict: Dict[str, Any]):
|
||||
"""Add item_type to expression dictionary."""
|
||||
item_dict["item_type"] = type(item).__name__
|
||||
if hasattr(item, "items"):
|
||||
# Group, alternative, etc.
|
||||
for sub_item, sub_item_dict in zip(item.items, item_dict["items"]):
|
||||
add_type(sub_item, sub_item_dict)
|
||||
elif hasattr(item, "rule_body"):
|
||||
# Rule
|
||||
add_type(item.rule_body, item_dict["rule_body"])
|
||||
|
||||
# Convert to dictionary
|
||||
intents_dict = {}
|
||||
for intent_name, intent_sentences in intents.items():
|
||||
sentence_dicts = []
|
||||
for sentence in intent_sentences:
|
||||
sentence_dict = attr.asdict(sentence)
|
||||
|
||||
# Add item_type field
|
||||
add_type(sentence, sentence_dict)
|
||||
sentence_dicts.append(sentence_dict)
|
||||
|
||||
intents_dict[intent_name] = sentence_dicts
|
||||
|
||||
# Convert to JSON
|
||||
return jsonify(intents_dict)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.route("/process", methods=["GET"])
|
||||
async def marytts_process():
|
||||
"""Emulate MaryTTS /process API"""
|
||||
global last_sentence
|
||||
|
||||
assert core is not None
|
||||
sentence = request.args.get("INPUT_TEXT", "")
|
||||
voice = request.args.get("VOICE")
|
||||
locale = request.args.get("LOCALE")
|
||||
spoken = await core.speak_sentence(
|
||||
sentence, play=False, voice=voice, language=locale
|
||||
)
|
||||
|
||||
return spoken.wav_data
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.errorhandler(Exception)
|
||||
async def handle_error(err) -> Tuple[str, int]:
|
||||
"""Return error as text."""
|
||||
@@ -835,31 +1006,38 @@ async def handle_error(err) -> Tuple[str, int]:
|
||||
# Static Routes
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
web_dir = os.path.join(os.getcwd(), "dist")
|
||||
web_dir = Path("dist")
|
||||
assert web_dir.is_dir(), f"Missing web directory {web_dir}"
|
||||
|
||||
|
||||
css_dir = web_dir / "css"
|
||||
js_dir = web_dir / "js"
|
||||
img_dir = web_dir / "img"
|
||||
webfonts_dir = web_dir / "webfonts"
|
||||
|
||||
|
||||
@app.route("/css/<path:filename>", methods=["GET"])
|
||||
async def css(filename) -> Response:
|
||||
"""CSS static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "css"), filename)
|
||||
return await send_from_directory(css_dir, filename)
|
||||
|
||||
|
||||
@app.route("/js/<path:filename>", methods=["GET"])
|
||||
async def js(filename) -> Response:
|
||||
"""Javascript static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "js"), filename)
|
||||
return await send_from_directory(js_dir, filename)
|
||||
|
||||
|
||||
@app.route("/img/<path:filename>", methods=["GET"])
|
||||
async def img(filename) -> Response:
|
||||
"""Image static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "img"), filename)
|
||||
return await send_from_directory(img_dir, filename)
|
||||
|
||||
|
||||
@app.route("/webfonts/<path:filename>", methods=["GET"])
|
||||
async def webfonts(filename) -> Response:
|
||||
"""Web font static endpoint."""
|
||||
return await send_from_directory(os.path.join(web_dir, "webfonts"), filename)
|
||||
return await send_from_directory(webfonts_dir, filename)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
@@ -870,13 +1048,13 @@ async def webfonts(filename) -> Response:
|
||||
@app.route("/", methods=["GET"])
|
||||
async def index() -> Response:
|
||||
"""Render main web page."""
|
||||
return await send_file(os.path.join(web_dir, "index.html"))
|
||||
return await send_file(web_dir / "index.html")
|
||||
|
||||
|
||||
@app.route("/swagger.yaml", methods=["GET"])
|
||||
async def swagger_yaml() -> Response:
|
||||
"""OpenAPI static endpoint."""
|
||||
return await send_file(os.path.join(web_dir, "swagger.yaml"))
|
||||
return await send_file(web_dir / "swagger.yaml")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -970,15 +1148,25 @@ async def api_events_log() -> None:
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Swagger UI
|
||||
quart_api_doc(
|
||||
app, config_path=(web_dir / "swagger.yaml"), url_prefix="/api", title="Rhasspy API"
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def prefers_json() -> bool:
|
||||
"""True if client prefers JSON over plain text."""
|
||||
return quality(request.accept_mimetypes, "application/json") > quality(
|
||||
request.accept_mimetypes, "text/plain"
|
||||
)
|
||||
|
||||
|
||||
def quality(accept, key: str) -> float:
|
||||
"""Return Accept quality for media type."""
|
||||
for option in accept.options:
|
||||
# pylint: disable=W0212
|
||||
if accept._values_match(key, option.value):
|
||||
return option.quality
|
||||
return 0.0
|
||||
|
||||
@@ -0,0 +1,402 @@
|
||||
#!/usr/bin/env bash
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
CPU_ARCH="$(uname --m)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command-line Arguments
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
. "${this_dir}/etc/shflags"
|
||||
|
||||
DEFINE_string 'venv' "${this_dir}/.venv" 'Path to create virtual environment'
|
||||
DEFINE_string 'download-dir' "${this_dir}/download" 'Directory to cache downloaded files'
|
||||
DEFINE_string 'build-dir' "${this_dir}/build_${CPU_ARCH}" 'Directory to build dependencies in'
|
||||
DEFINE_boolean 'system' true 'Install system dependencies'
|
||||
DEFINE_boolean 'flair' false 'Install flair'
|
||||
DEFINE_boolean 'precise' false 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'adapt' false 'Install Mycroft Adapt'
|
||||
DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' false 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'web' true "Build Vue web interface with yarn"
|
||||
DEFINE_boolean 'sudo' true "Use sudo for apt"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' 'python3' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Default Settings
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
python="${FLAGS_python}"
|
||||
venv="${FLAGS_venv}"
|
||||
|
||||
download_dir="${FLAGS_download_dir}"
|
||||
mkdir -p "${download_dir}"
|
||||
echo "Download directory: ${download_dir}"
|
||||
|
||||
build_dir="${FLAGS_build_dir}"
|
||||
mkdir -p "${build_dir}"
|
||||
echo "Build directory: ${build_dir}"
|
||||
|
||||
if [[ "${FLAGS_system}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_system='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_flair}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_flair='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_precise}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_precise='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_adapt}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_adapt='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_kaldi}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_kaldi='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_google}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_google='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_offline}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
offline='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_web}" -eq "${FLAGS_FALSE}" ]]; then
|
||||
no_web='true'
|
||||
fi
|
||||
|
||||
if [[ "${FLAGS_sudo}" -eq "${FLAGS_TRUE}" ]]; then
|
||||
function run_sudo {
|
||||
sudo "$@"
|
||||
}
|
||||
else
|
||||
function run_sudo {
|
||||
"$@"
|
||||
}
|
||||
fi
|
||||
|
||||
make_threads="${FLAGS_make_threads}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Create a temporary directory for building stuff
|
||||
temp_dir="$(mktemp -d)"
|
||||
|
||||
function cleanup {
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Checking required programs"
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
if [[ ! -n "$(command -v yarn)" ]]; then
|
||||
echo "Please install yarn to continue (https://yarnpkg.com)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_system}" ]]; then
|
||||
echo "Installing system dependencies"
|
||||
|
||||
run_sudo apt-get update
|
||||
run_sudo apt-get install --no-install-recommends --yes \
|
||||
python3 python3-pip python3-venv python3-dev \
|
||||
python \
|
||||
build-essential autoconf autoconf-archive libtool automake bison \
|
||||
sox espeak flite swig portaudio19-dev \
|
||||
libatlas-base-dev \
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Downloading dependencies"
|
||||
|
||||
# Python-Pocketsphinx
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
if [[ ! -s "${pocketsphinx_file}" ]]; then
|
||||
pocketsphinx_url='https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz'
|
||||
echo "Downloading pocketsphinx (${pocketsphinx_url})"
|
||||
maybe_download "${pocketsphinx_url}" "${pocketsphinx_file}"
|
||||
fi
|
||||
|
||||
# OpenFST
|
||||
openfst_dir="${build_dir}/openfst-1.6.9"
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
openfst_file="${download_dir}/openfst-1.6.9.tar.gz"
|
||||
|
||||
if [[ ! -s "${openfst_file}" ]]; then
|
||||
openfst_url='http://openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.9.tar.gz'
|
||||
echo "Downloading openfst (${openfst_url})"
|
||||
maybe_download "${openfst_url}" "${openfst_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Opengrm
|
||||
opengrm_dir="${build_dir}/opengrm-ngram-1.3.4"
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
opengrm_file="${download_dir}/opengrm-ngram-1.3.4.tar.gz"
|
||||
|
||||
if [[ ! -s "${opengrm_file}" ]]; then
|
||||
opengrm_url='http://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-1.3.4.tar.gz'
|
||||
echo "Downloading opengrm (${opengrm_url})"
|
||||
maybe_download "${opengrm_url}" "${opengrm_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Phonetisaurus
|
||||
phonetisaurus_dir="${build_dir}/phonetisaurus"
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
phonetisaurus_file="${download_dir}/phonetisaurus-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${phonetisaurus_file}" ]]; then
|
||||
phonetisaurus_url='https://github.com/synesthesiam/phonetisaurus-2019/releases/download/v1.0/phonetisaurus-2019.tar.gz'
|
||||
echo "Downloading phonetisaurus (${phonetisaurus_url})"
|
||||
maybe_download "${phonetisaurus_url}" "${phonetisaurus_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Kaldi
|
||||
kaldi_dir="${this_dir}/opt/kaldi"
|
||||
if [[ ! -d "${kaldi_dir}" ]]; then
|
||||
install libatlas-base-dev libatlas3-base gfortran
|
||||
run_sudo ldconfig
|
||||
kaldi_file="${download_dir}/kaldi-2019.tar.gz"
|
||||
|
||||
if [[ ! -s "${kaldi_file}" ]]; then
|
||||
kaldi_url='https://github.com/kaldi-asr/kaldi/archive/master.tar.gz'
|
||||
echo "Downloading kaldi (${kaldi_url})"
|
||||
maybe_download "${kaldi_url}" "${kaldi_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Re-create virtual environment
|
||||
echo "Creating virtual environment"
|
||||
rm -rf "${venv}"
|
||||
"${python}" -m venv "${venv}"
|
||||
source "${venv}/bin/activate"
|
||||
pip3 install wheel setuptools
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# openfst
|
||||
# http://www.openfst.org
|
||||
#
|
||||
# Required to build languag models and do intent recognition.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${openfst_dir}/build" ]]; then
|
||||
echo "Building openfst (${openfst_file})"
|
||||
tar -C "${build_dir}" -xf "${openfst_file}" && \
|
||||
cd "${openfst_dir}" && \
|
||||
./configure "--prefix=${openfst_dir}/build" \
|
||||
--enable-far \
|
||||
--disable-static \
|
||||
--enable-shared \
|
||||
--enable-ngram-fsts && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${openfst_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${openfst_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
cp -R "${openfst_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# opengrm
|
||||
# http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary
|
||||
#
|
||||
# Required to build language models.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# opengrm
|
||||
if [[ ! -d "${opengrm_dir}/build" ]]; then
|
||||
echo "Building opengrm (${opengrm_file})"
|
||||
export CXXFLAGS="-I${venv}/include"
|
||||
export LDFLAGS="-L${venv}/lib"
|
||||
tar -C "${build_dir}" -xf "${opengrm_file}" && \
|
||||
cd "${opengrm_dir}" && \
|
||||
./configure "--prefix=${opengrm_dir}/build" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${opengrm_dir}"/build/bin/* "${venv}/bin/"
|
||||
cp -R "${opengrm_dir}"/build/include/* "${venv}/include/"
|
||||
cp -R "${opengrm_dir}"/build/lib/*.so* "${venv}/lib/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# phonetisaurus
|
||||
# https://github.com/AdolfVonKleist/Phonetisaurus
|
||||
#
|
||||
# Required to guess word pronunciations.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -d "${phonetisaurus_dir}/build" ]]; then
|
||||
echo "Installing phonetisaurus (${phonetisaurus_file})"
|
||||
tar -C "${build_dir}" -xf "${phonetisaurus_file}" && \
|
||||
cd "${phonetisaurus_dir}" && \
|
||||
./configure "--prefix=${phonetisaurus_dir}/build" \
|
||||
--with-openfst-includes="${venv}/include" \
|
||||
--with-openfst-libs="${venv}/lib" && \
|
||||
make -j "${make_threads}" && \
|
||||
make install
|
||||
fi
|
||||
|
||||
# Copy build artifacts into virtual environment
|
||||
cp -R "${phonetisaurus_dir}"/build/bin/* "${venv}/bin/"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# kaldi
|
||||
# https://kaldi-asr.org
|
||||
#
|
||||
# Required for speech recognition with Kaldi-based profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" && ! -f "${kaldi_dir}/src/online2bin/online2-wav-nnet3-latgen-faster" ]]; then
|
||||
echo "Installing kaldi (${kaldi_file})"
|
||||
|
||||
# armhf
|
||||
if [[ -f '/usr/lib/arm-linux-gnueabihf/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/arm-linux-gnueabihf'
|
||||
fi
|
||||
|
||||
# aarch64
|
||||
if [[ -f '/usr/lib/aarch64-linux-gnu/libatlas.so' ]]; then
|
||||
# Kaldi install doesn't check here, despite in being in ldconfig
|
||||
export ATLASLIBDIR='/usr/lib/aarch64-linux-gnu'
|
||||
fi
|
||||
|
||||
tar -C "${build_dir}" -xf "${kaldi_file}" && \
|
||||
cp "${this_dir}/etc/linux_atlas_aarch64.mk" "${kaldi_dir}/src/makefiles/" && \
|
||||
cd "${kaldi_dir}/tools" && \
|
||||
make -j "${make_threads}" && \
|
||||
cd "${kaldi_dir}/src" && \
|
||||
./configure --shared --mathlib=ATLAS --use-cuda=no && \
|
||||
make depend -j "${make_threads}" && \
|
||||
make -j "${make_threads}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python requirements
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Installing Python requirements"
|
||||
|
||||
"${python}" -m pip install requests
|
||||
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed -i '/^flair/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed -i '/^precise-runner/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed -i '/^adapt-parser/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed -i '/^google-cloud-texttospeech/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
# Install everything except openfst first
|
||||
sed -i '/^openfst/d' "${requirements_file}"
|
||||
|
||||
"${python}" -m pip install -r "${requirements_file}"
|
||||
|
||||
echo "Installing Python openfst wrapper"
|
||||
"${python}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
# https://github.com/cmusphinx/pocketsphinx
|
||||
#
|
||||
# Speech to text for most profiles.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pocketsphinx_file="${download_dir}/pocketsphinx-python.tar.gz"
|
||||
echo "Installing Python pocketsphinx (${pocketsphinx_file})"
|
||||
|
||||
"${python}" -m pip install "${pocketsphinx_file}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Snowboy
|
||||
# https://snowboy.kitt.ai
|
||||
#
|
||||
# Wake word system
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
snowboy_file="${download_dir}/snowboy-1.3.0.tar.gz"
|
||||
echo "Installing snowboy (${snowboy_file})"
|
||||
"${python}" -m pip install "${snowboy_file}"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Not installing snowboy (${CPU_ARCH} not supported)"
|
||||
esac
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_web}" ]]; then
|
||||
echo "Building web interface"
|
||||
cd "${this_dir}" && yarn build
|
||||
fi
|
||||
@@ -17,6 +17,7 @@ DEFINE_boolean 'google' false 'Install Google Text to Speech'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_integer 'make-threads' 4 'Number of threads to use with make' 'j'
|
||||
DEFINE_string 'python' '' 'Path to Python executable'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
@@ -75,14 +76,14 @@ trap cleanup EXIT
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -f "$2" ]]; then
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
@@ -103,38 +104,45 @@ if [[ -z "${no_system}" ]]; then
|
||||
gfortran \
|
||||
sphinxbase-utils sphinxtrain pocketsphinx \
|
||||
jq checkinstall unzip xz-utils \
|
||||
curl
|
||||
curl \
|
||||
lame
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Python >= 3.6
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ ! -z "$(which python3.8)" ]]; then
|
||||
PYTHON='python3.8'
|
||||
elif [[ ! -z "$(which python3.7)" ]]; then
|
||||
PYTHON='python3.7'
|
||||
elif [[ ! -z "$(which python3.6)" ]]; then
|
||||
PYTHON='python3.6'
|
||||
if [[ -z "${FLAGS_python}" ]]; then
|
||||
# Auto-detect Python
|
||||
if [[ -n "$(command -v python3.8)" ]]; then
|
||||
PYTHON='python3.8'
|
||||
elif [[ -n "$(command -v python3.7)" ]]; then
|
||||
PYTHON='python3.7'
|
||||
elif [[ -n "$(command -v python3.6)" ]]; then
|
||||
PYTHON='python3.6'
|
||||
else
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install --no-install-recommends --yes \
|
||||
tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
maybe_download "${python_url}" "${python_file}"
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j "${make_threads}" && \
|
||||
sudo make altinstall
|
||||
|
||||
PYTHON='python3.6'
|
||||
fi
|
||||
else
|
||||
echo "Installing Python 3.6 from source. This is going to take a LONG time."
|
||||
sudo apt-get install --no-install-recommends --yes \
|
||||
tk-dev libncurses5-dev libncursesw5-dev \
|
||||
libreadline6-dev libdb5.3-dev libgdbm-dev \
|
||||
libsqlite3-dev libssl-dev libbz2-dev \
|
||||
libexpat1-dev liblzma-dev zlib1g-dev
|
||||
|
||||
python_file="${download_dir}/Python-3.6.8.tar.xz"
|
||||
python_url='https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tar.xz'
|
||||
maybe_download "${python_url}" "${python_file}"
|
||||
|
||||
tar -C "${temp_dir}" -xf "${python_file}"
|
||||
cd "${temp_dir}/Python-3.6.8" && \
|
||||
./configure && \
|
||||
make -j "${make_threads}" && \
|
||||
sudo make altinstall
|
||||
|
||||
PYTHON='python3.6'
|
||||
# User-provided Python
|
||||
PYTHON="${FLAGS_python}"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -155,19 +163,23 @@ case "${CPU_ARCH}" in
|
||||
arm64v8)
|
||||
FRIENDLY_ARCH=aarch64
|
||||
;;
|
||||
|
||||
*)
|
||||
FRIENDLY_ARCH="${CPU_ARCH}"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Downloading dependencies"
|
||||
download_args=()
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
download_args+=('--offline')
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_precise}" ]]; then
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
download_args+=('--noprecise')
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_kaldi}" ]]; then
|
||||
if [[ -n "${no_kaldi}" ]]; then
|
||||
download_args+=('--nokaldi')
|
||||
fi
|
||||
|
||||
@@ -201,6 +213,9 @@ export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}"
|
||||
# shellcheck source=/dev/null
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
echo "Upgrading pip"
|
||||
"${PYTHON}" -m pip install --upgrade pip
|
||||
|
||||
echo "Installing Python requirements"
|
||||
"${PYTHON}" -m pip install wheel setuptools
|
||||
"${PYTHON}" -m pip install requests
|
||||
@@ -208,38 +223,43 @@ echo "Installing Python requirements"
|
||||
# pytorch is not available on ARM
|
||||
case "${CPU_ARCH}" in
|
||||
armv7l|arm64v8)
|
||||
no_flair="true" ;;
|
||||
no_flair="true" ;;
|
||||
esac
|
||||
|
||||
requirements_file="${temp_dir}/requirements.txt"
|
||||
cp "${this_dir}/requirements.txt" "${requirements_file}"
|
||||
|
||||
# Exclude requirements
|
||||
if [[ ! -z "${no_flair}" ]]; then
|
||||
if [[ -n "${no_flair}" ]]; then
|
||||
echo "Excluding flair from virtual environment"
|
||||
sed -i '/^flair/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_precise}" ]]; then
|
||||
if [[ -n "${no_precise}" ]]; then
|
||||
echo "Excluding Mycroft Precise from virtual environment"
|
||||
sed -i '/^precise-runner/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_adapt}" ]]; then
|
||||
if [[ -n "${no_adapt}" ]]; then
|
||||
echo "Excluding Mycroft Adapt from virtual environment"
|
||||
sed -i '/^adapt-parser/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
if [[ ! -z "${no_google}" ]]; then
|
||||
if [[ -n "${no_google}" ]]; then
|
||||
echo "Excluding Google Text to Speech from virtual environment"
|
||||
sed -i '/^google-cloud-texttospeech/d' "${requirements_file}"
|
||||
fi
|
||||
|
||||
# Install everything except openfst first
|
||||
sed -i '/^openfst/d' "${requirements_file}"
|
||||
python3 -m pip install -r "${requirements_file}"
|
||||
|
||||
# Install Python openfst wrapper
|
||||
"${PYTHON}" -m pip install \
|
||||
--global-option=build_ext \
|
||||
--global-option="-I${venv}/include" \
|
||||
--global-option="-L${venv}/lib" \
|
||||
-r "${requirements_file}"
|
||||
-r <(grep '^openfst' "${this_dir}/requirements.txt")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Pocketsphinx for Python
|
||||
@@ -266,7 +286,7 @@ esac
|
||||
# Mycroft Precise
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" && -z "$(which precise-engine)" ]]; then
|
||||
if [[ -z "${no_precise}" && -z "$(command -v precise-engine)" ]]; then
|
||||
case "${CPU_ARCH}" in
|
||||
x86_64|armv7l)
|
||||
echo "Installing Mycroft Precise"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Package: rhasspy-server
|
||||
Version: 2.4.8
|
||||
Version: 2.4.10
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Depends: sox,alsa-utils,espeak,libstdc++6,jq,xz-utils,unzip,curl,sphinxbase-utils,sphinxtrain,flite,libatlas-base-dev,gfortran
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
rhasspy_version="2.4.8"
|
||||
rhasspy_version="2.4.10"
|
||||
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
@@ -46,7 +46,7 @@ fi
|
||||
cd "${this_dir}"
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
if [[ -z "$(which pyinstaller)" ]]; then
|
||||
if [[ -z "$(command -v pyinstaller)" ]]; then
|
||||
echo "Missing PyInstaller"
|
||||
exit 1
|
||||
fi
|
||||
@@ -131,7 +131,7 @@ cp "${this_dir}/app.py" "${share_dir}/src/"
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "Copying Kaldi"
|
||||
kaldi_src="${venv}/kaldi"
|
||||
kaldi_src="${this_dir}/opt/kaldi"
|
||||
if [[ ! -d "${kaldi_src}" ]]; then
|
||||
echo "Missing Kaldi at ${kaldi_src}"
|
||||
exit 1
|
||||
@@ -145,7 +145,7 @@ rsync -av --delete "${kaldi_src}/" "${kaldi_dest}/"
|
||||
rm -f "${kaldi_dest}/egs/wsj/s5/utils/utils"
|
||||
|
||||
# Turn duplicate .so files into symbolic links
|
||||
function fix_library_links() {
|
||||
function fix_library_links {
|
||||
lib_dir="$1"
|
||||
|
||||
for lib in "${lib_dir}"/*.so; do
|
||||
|
||||
@@ -59,6 +59,11 @@ COPY profiles/fr/profile.json \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/fr/kaldi/custom_words.txt \
|
||||
profiles/fr/kaldi/espeak_phonemes.txt \
|
||||
profiles/fr/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/fr/kaldi/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
|
||||
@@ -48,7 +48,7 @@ RUN python3 -m pip install --no-cache-dir /pocketsphinx-python.tar.gz && \
|
||||
COPY download/snowboy-1.3.0.tar.gz /
|
||||
RUN if [ "$BUILD_ARCH" != "aarch64" ]; then pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz; fi
|
||||
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils lame
|
||||
|
||||
COPY download/kaldi_${BUILD_ARCH}.tar.gz /kaldi.tar.gz
|
||||
RUN mkdir -p /opt && \
|
||||
@@ -133,6 +133,11 @@ COPY profiles/fr/profile.json \
|
||||
profiles/fr/frequent_words.txt \
|
||||
profiles/fr/sentences.ini \
|
||||
profiles/fr/stop_words.txt ${RHASSPY_APP}/profiles/fr/
|
||||
|
||||
COPY profiles/fr/kaldi/custom_words.txt \
|
||||
profiles/fr/kaldi/espeak_phonemes.txt \
|
||||
profiles/fr/kaldi/phoneme_examples.txt \
|
||||
${RHASSPY_APP}/profiles/fr/kaldi/
|
||||
|
||||
COPY profiles/ru/profile.json \
|
||||
profiles/ru/custom_words.txt \
|
||||
@@ -209,6 +214,7 @@ COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
COPY VERSION ${RHASSPY_APP}/
|
||||
|
||||
ENV CONFIG_PATH /data/options.json
|
||||
ENV KALDI_PREFIX /opt
|
||||
|
||||
@@ -7,3 +7,4 @@ COPY rhasspy/train/jsgf2fst/*.py ${RHASSPY_APP}/rhasspy/train/jsgf2fst/
|
||||
COPY rhasspy/train/*.py ${RHASSPY_APP}/rhasspy/train/
|
||||
COPY *.py ${RHASSPY_APP}/
|
||||
COPY rhasspy/*.py ${RHASSPY_APP}/rhasspy/
|
||||
COPY VERSION ${RHASSPY_APP}/
|
||||
|
||||
@@ -1 +1 @@
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils
|
||||
RUN apt-get install --no-install-recommends --yes flite libttspico-utils lame
|
||||
|
||||
@@ -1 +1 @@
|
||||
theme: jekyll-theme-cayman
|
||||
theme: jekyll-theme-cayman
|
||||
|
||||
@@ -2,15 +2,49 @@
|
||||
|
||||
Rhasspy was created and is currently maintained by [Michael Hansen](https://synesthesiam.com/).
|
||||
|
||||

|
||||
<img src="../img/mike-head.png" style="max-height: 100px;" title="Mike head">
|
||||
|
||||
Special thanks to:
|
||||
|
||||
* [Romkabouter](https://github.com/Romkabouter)
|
||||
* [koenvervloesem](https://github.com/koenvervloesem)
|
||||
* [FunkyBoT](https://community.home-assistant.io/u/FunkyBoT)
|
||||
* [fastjack](https://community.rhasspy.org/u/fastjack)
|
||||
* [S_n_Nguy_n](https://community.home-assistant.io/u/S_n_Nguy_n)
|
||||
|
||||
## Motivation
|
||||
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [flair](http://github.com/zalandoresearch/flair)
|
||||
* [Rasa NLU](https://rasa.com/)
|
||||
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org).
|
||||
|
||||
For each intent you define, Rhasspy emits a JSON event that can do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
|
||||
## Supporting Tools
|
||||
|
||||
The following tools/libraries help to support Rhasspy:
|
||||
|
||||
@@ -22,11 +22,11 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
Set `microphone.pyaudio.device` to a PyAudio device number or leave blank for the default device.
|
||||
Streams 30ms chunks of 16-bit, 16 Khz mono audio by default (480 frames).
|
||||
Streams 30ms chunks of 16-bit, 16 kHz mono audio by default (480 frames).
|
||||
|
||||
See `rhasspy.audio_recorder.PyAudioRecorder` for details.
|
||||
|
||||
## ALSA
|
||||
## ALSA
|
||||
|
||||
Starts an `arecord` process locally and reads audio data from its standard out.
|
||||
Works best with [ALSA](https://www.alsa-project.org/main/index.php/Main_Page).
|
||||
@@ -42,7 +42,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `microphone.arecord.device` to the name of the ALSA device to use (`-D` flag
|
||||
to `arecord`) or leave blank for the default device.
|
||||
By default, calls `arecord -t raw -r 16000 -f S16_LE -c 1` and reads 30ms (960
|
||||
@@ -52,7 +52,7 @@ See `rhasspy.audio_recorder.ARecordAudioRecorder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Listens to the `hermes/audioServer/<SITE_ID>/audioFrame` topic for WAV data ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to receive audio from [Snips.AI](https://snips.ai/).
|
||||
Audio data is automatically converted to 16-bit, 16 kHz mono with [sox](http://sox.sourceforge.net).
|
||||
|
||||
@@ -72,7 +72,7 @@ Add to your [profile](profiles.md):
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
@@ -80,7 +80,7 @@ See `rhasspy.audio_recorder.HermesAudioRecorder` for details.
|
||||
|
||||
## HTTP Stream
|
||||
|
||||
Accepts chunks of 16-bit 16Khz mono audio via an HTTP POST stream (assumes [chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding)).
|
||||
Accepts chunks of 16-bit 16 kHz mono audio via an HTTP POST stream (assumes [chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding)).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -95,7 +95,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
```
|
||||
|
||||
Set `microphone.http.stop_after` to one of "never", "text", or "intent". When set to "never", you can continously stream (chunked) audio into Rhasspy across multiple voice commands. When set to "text" or "intent", the stream will be closed when the first voice command has been transcribed ("text") or recognized ("intent"). Once closed, you can perform an HTTP GET request to the stream URL to retrieve the result (text for transcriptions or JSON for intent).
|
||||
Set `microphone.http.stop_after` to one of "never", "text", or "intent". When set to "never", you can continuously stream (chunked) audio into Rhasspy across multiple voice commands. When set to "text" or "intent", the stream will be closed when the first voice command has been transcribed ("text") or recognized ("intent"). Once closed, you can perform an HTTP GET request to the stream URL to retrieve the result (text for transcriptions or JSON for intent).
|
||||
|
||||
Note that `microphone.http.port` must be different than Rhasspy's webserver port (usually 12101).
|
||||
|
||||
@@ -122,7 +122,7 @@ Set `microphone.gstreamer.pipeline` to your GStreamer pipeline **without a sink*
|
||||
udpsrc port=12333 ! rawaudioparse use-sink-caps=false format=pcm pcm-format=s16le sample-rate=16000 num-channels=1 ! queue ! audioconvert ! audioresample
|
||||
```
|
||||
|
||||
which "simply" receives raw 16-bit 16khz audio chunks via UDP port 12333. You could stream microphone audio to Rhasspy from another machine by running the following terminal command:
|
||||
which "simply" receives raw 16-bit 16 kHz audio chunks via UDP port 12333. You could stream microphone audio to Rhasspy from another machine by running the following terminal command:
|
||||
|
||||
```bash
|
||||
gst-launch-1.0 \
|
||||
@@ -152,4 +152,3 @@ Add to your [profile](profiles.md):
|
||||
```
|
||||
|
||||
See `rhasspy.audio_recorder.DummyAudioRecorder` for details.
|
||||
|
||||
|
||||
@@ -9,41 +9,44 @@ Plays WAV files on the local device by calling the `aplay` command. Should work
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "aplay",
|
||||
"aplay": {
|
||||
"device": ""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If provided, `sounds.aplay.device` is passed to `aplay` with the `-D` argument.
|
||||
Leave it blank to use the default device.
|
||||
|
||||
See `rhasspy.audio_player.APlayAudioPlayer` for details.
|
||||
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Publishes WAV data to the `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>` topic ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to send audio to [Snips.AI](https://snips.ai/).
|
||||
|
||||
Rhasspy will always try to send 16 kHz, 16-bit mono audio.
|
||||
Rhasspy will by default send 16 kHz, 16-bit mono audio, unless specified otherwise.
|
||||
The request id is generated each time a sound is played using `uuid.uuid4`.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
```json
|
||||
"sounds": {
|
||||
"system": "hermes"
|
||||
},
|
||||
|
||||
"mqtt": {
|
||||
"enabled": true,
|
||||
"host": "localhost",
|
||||
"username": "",
|
||||
"port": 1883,
|
||||
"password": "",
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
@@ -11,7 +11,6 @@ You can also make Rhasspy record a voice command using the [HTTP API](usage.md#h
|
||||
2. Speaking your voice command
|
||||
3. POST-ing to `/api/stop-recording`. Rhasspy will stop recording and process the voice command.
|
||||
|
||||
|
||||
## WebRTCVAD
|
||||
|
||||
Listens for a voice commands using [webrtcvad](https://github.com/wiseman/py-webrtcvad) to detect speech and silence.
|
||||
@@ -33,7 +32,7 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
This system listens for up to `timeout_sec` for a voice command. The first few frames of audio data are discarded (`throwaway_buffers`) to avoid clicks from the microphone being engaged. When speech is detected for some number of successive frames (`speech_buffers`), the voice command is considered to have *started*. After `min_sec`, Rhasspy will start listening for silence. If at least `silence_sec` goes by without any speech detected, the command is considered *finished*, and the recorded WAV data is sent to the [speech recognition system](speech-to-text.md).
|
||||
|
||||
You may want to adjust `min_sec`, `silence_sec`, and `vad_mode` for your environment.
|
||||
@@ -60,15 +59,15 @@ Add to your [profile](profiles.md):
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
See `rhasspy.command_listener.OneShotCommandListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)).
|
||||
Subscribes to the `hermes/asr/startListening` and `hermes/asr/stopListening` topics ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
This allows Rhasspy to be controlled by [Snips.AI](https://snips.ai/).
|
||||
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
Wakes up Rhasspy when `startListening` is received and starts recording. Stops recording when `stopListening` is received and processes the voice command.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -96,12 +95,16 @@ Set `mqtt.site_id` to match your Snips.AI siteId.
|
||||
|
||||
Using [mosquitto_pub](https://mosquitto.org/man/mosquitto_pub-1.html), wake up Rhasspy with:
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/startListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
Say your voice command, then stop recording with:
|
||||
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
|
||||
```bash
|
||||
mosquitto_pub -t 'hermes/asr/stopListening' -m '{ "siteId": "default" }'
|
||||
```
|
||||
|
||||
Rhasspy should process your voice command.
|
||||
|
||||
See `rhasspy.command.HermesCommandListener` for details.
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
# Development
|
||||
|
||||
Rhasspy's code can be found [on GitHub](https://github.com/synesthesiam/rhasspy).
|
||||
|
||||
## Set up your development environment
|
||||
|
||||
If you want to start developing on Rhasspy, [fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the repository, and clone your fork:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/<your_username>/rhasspy.git
|
||||
cd rhasspy
|
||||
```
|
||||
|
||||
Add the original repository as an [upstream remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork):
|
||||
|
||||
```bash
|
||||
git remote add upstream https://github.com/synesthesiam/rhasspy.git
|
||||
```
|
||||
|
||||
Then follow the installation steps for a [virtual environment](installation.md#virtual-environment). If the `create-venv.sh` script fails, please [report an issue](https://github.com/synesthesiam/rhasspy/issues) before proceeding.
|
||||
|
||||
If you pull changes, make sure to re-download and extract `rhasspy-web-dist.tar.gz` from [the releases page](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0). This contains the pre-compiled web artifacts. Alternatively, you can install [yarn](https://yarnpkg.com) and run `yarn build` in the `rhasspy` directory after a `git pull`.
|
||||
|
||||
## Run the unit tests
|
||||
|
||||
A good start to check whether your development environment is set up correctly (or to find some bugs) is to run the unit tests:
|
||||
|
||||
```bash
|
||||
./run-tests.sh
|
||||
```
|
||||
|
||||
This will run tests against pre-recorded WAV files in `rhasspy/etc/test` for specific languages. You can run tests only for a specific language (profile) like this:
|
||||
|
||||
```bash
|
||||
./run-tests.sh -p en
|
||||
```
|
||||
|
||||
It’s good practice to run the unit tests before and after you work on something, to be sure your changes don't accidentally break something.
|
||||
|
||||
## Keeping your fork synchronized
|
||||
|
||||
When the upstream repository has new commits, you should [synchronize your fork](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork):
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git checkout master
|
||||
git merge upstream/master
|
||||
```
|
||||
|
||||
Then [update your fork on GitHub](https://help.github.com/en/github/using-git/pushing-commits-to-a-remote-repository):
|
||||
|
||||
```bash
|
||||
git push
|
||||
```
|
||||
|
||||
Your fork is now synchronized to the original repository.
|
||||
|
||||
## Development practices
|
||||
|
||||
* Before starting significant work, please propose it and discuss it first on the [issue tracker](https://github.com/synesthesiam/rhasspy/issues) on GitHub. Other people may have suggestions, will want to collaborate and will wish to review your code.
|
||||
* Please work on one piece of conceptual work at a time. Keep each narrative of work in a different branch.
|
||||
* As much as possible, have each commit solve one problem.
|
||||
* A commit must not leave the project in a non-functional state.
|
||||
* Run the unit tests before you create a commit.
|
||||
* Treat code, tests and documentation as one.
|
||||
* Create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) from your fork.
|
||||
|
||||
## Development workflow
|
||||
|
||||
If you want to start working on a specific feature or bug fix, this is an example workflow:
|
||||
|
||||
* Synchronize your fork with the upstream repository.
|
||||
* Create a new branch: `git checkout -b <nameofbranch>`
|
||||
* Create your changes.
|
||||
* Add the changed files with `git add <files>`.
|
||||
* Commit your changes with `git commit`.
|
||||
* Push your changes to your fork on GitHub.
|
||||
* Create a pull request from your fork.
|
||||
|
||||
## License of contributions
|
||||
|
||||
By submitting patches to this project, you agree to allow them to be redistributed under the project’s [license](license.md) according to the normal forms and usages of the open source community.
|
||||
|
||||
It is your responsibility to make sure you have all the necessary rights to contribute to the project.
|
||||
|
After Width: | Height: | Size: 20 KiB |
@@ -0,0 +1,140 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="149.42726mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 149.42726 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-discourse-logo.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-logo.png"
|
||||
inkscape:export-xdpi="82.716721"
|
||||
inkscape:export-ydpi="82.716721">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="0.9899495"
|
||||
inkscape:cx="268.11251"
|
||||
inkscape:cy="139.11788"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<circle
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path1476"
|
||||
cx="-29.128448"
|
||||
cy="118.59783"
|
||||
r="18.174328" />
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:30.12816238px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.5"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
id="text824"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan822"
|
||||
x="-5.9640822"
|
||||
y="128.49496"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:30.12828064px;font-family:'Sansus Webissimo';-inkscape-font-specification:'Sansus Webissimo, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;stroke:#000000;stroke-width:0.5">RHASSPY</tspan></text>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 6.9 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
After Width: | Height: | Size: 38 KiB |
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="36.848656mm"
|
||||
height="36.848656mm"
|
||||
viewBox="0 0 36.848656 36.848656"
|
||||
version="1.1"
|
||||
id="svg860"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="rhasspy-raven-square.svg"
|
||||
inkscape:export-filename="./rhasspy-discourse-square-logo-nocircle.png"
|
||||
inkscape:export-xdpi="352.92468"
|
||||
inkscape:export-ydpi="352.92468">
|
||||
<defs
|
||||
id="defs854" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.979899"
|
||||
inkscape:cx="-98.08577"
|
||||
inkscape:cy="43.808495"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
fit-margin-top="0"
|
||||
fit-margin-left="0"
|
||||
fit-margin-right="0"
|
||||
fit-margin-bottom="0"
|
||||
inkscape:window-width="1440"
|
||||
inkscape:window-height="755"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata857">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(47.552776,-100.1735)">
|
||||
<g
|
||||
transform="matrix(0.80207931,0,0,0.80207931,-74.139422,96.215375)"
|
||||
id="g2275">
|
||||
<g
|
||||
id="text817"
|
||||
style="font-style:normal;font-weight:normal;font-size:41.37965775px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.03449142"
|
||||
transform="rotate(-45)"
|
||||
aria-label="R">
|
||||
<path
|
||||
sodipodi:nodetypes="ccccccccccccccccssccccccccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path819"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:41.38083267px;font-family:'CC Adamantium';-inkscape-font-specification:'CC Adamantium, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;stroke-width:1.03449142"
|
||||
d="M 31.509252,62.491941 31.16794,75.667305 H 30.83455 L 28.604505,65.027061 15.316738,59.73528 13.781121,75.667305 H 13.286086 L 11.528138,71.268365 10.110857,66.929543 9.526899,61.048002 6.9616279,56.651114 8.7144255,54.299084 6.356732,51.899807 8.2521138,50.246675 6.1006224,45.789404 9.891565,42.358563 c 2.435726,-1.492588 4.806268,-0.545105 7.30443,-1.317335 4.174203,-1.290327 7.29492,-1.792422 11.275957,5.059621 0.756691,1.302392 3.239334,1.578749 4.130578,3.198298 -0.882306,1.555823 -2.064327,2.923061 -3.546063,4.101714 -1.481735,1.171918 -3.152055,2.175457 -5.01096,3.010617 -1.852169,0.828425 -3.852512,1.535617 -6.001029,2.121576 z M 25.51612,48.388298 c -6.142518,4.42909 -6.341445,-0.106922 -8.663766,-3.716207 l -1.283048,13.860963 c 5.545523,-1.913183 8.340713,-6.051669 9.946814,-10.144756 z" />
|
||||
</g>
|
||||
<ellipse
|
||||
ry="0.93544334"
|
||||
rx="0.33408689"
|
||||
cy="21.859995"
|
||||
cx="52.059788"
|
||||
id="path2115"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<ellipse
|
||||
transform="rotate(-45)"
|
||||
style="opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
id="ellipse2117"
|
||||
cx="18.873178"
|
||||
cy="50.914211"
|
||||
rx="0.33408689"
|
||||
ry="0.93544334" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2119"
|
||||
d="m 64.331743,23.950737 -0.788701,-2.167883 0.785662,-0.376444 0.715334,2.441702 z"
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#000000;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 69.630908,29.701094 1.48309,-1.766977 0.718843,0.492181 -1.75691,1.840348 z"
|
||||
id="path2121"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
sodipodi:nodetypes="cscc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2123"
|
||||
d="m 47.978861,19.145376 c -0.0362,0.284741 -0.632118,0.443544 -1.331028,0.354698 -0.698909,-0.08885 -1.236142,-0.391701 -1.199944,-0.676442 z"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1" />
|
||||
<path
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1;stroke-opacity:1"
|
||||
d="m 49.344113,18.846496 c 0.224679,0.178626 0.762248,-0.123625 1.200693,-0.675116 0.438451,-0.55148 0.611752,-1.143345 0.387075,-1.321972 z"
|
||||
id="path2126"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cscc" />
|
||||
<path
|
||||
sodipodi:nodetypes="ccc"
|
||||
inkscape:connector-curvature="0"
|
||||
id="path2128"
|
||||
d="m 43.707615,19.788656 8.68626,10.557147 c 2.944473,-4.699489 1.792375,-9.398979 -0.200452,-14.098468"
|
||||
style="opacity:1;fill:none;stroke:#808080;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.8 KiB |
|
After Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 65 KiB After Width: | Height: | Size: 38 KiB |
|
After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 76 KiB After Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 94 KiB After Width: | Height: | Size: 45 KiB |
|
After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 73 KiB After Width: | Height: | Size: 45 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 85 KiB After Width: | Height: | Size: 55 KiB |
@@ -1,41 +1,51 @@
|
||||

|
||||
<img src="img/rhasspy.svg" style="max-height: 200px;" title="Rhasspy logo">
|
||||
|
||||
Rhasspy (pronounced RAH-SPEE) is an [open source](https://github.com/synesthesiam/rhasspy), fully offline voice assistant toolkit for [many languages](#supported-languages) that works well with [Home Assistant](https://www.home-assistant.io/), [Hass.io](https://www.home-assistant.io/hassio/), and [Node-RED](https://nodered.org).
|
||||
|
||||
Rhasspy transforms voice commands into [JSON](https://json.org) events that can trigger actions in home automation software, like [Home Assistant automations](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red). You define custom voice commands in a [profile](profiles.md) using a [specialized template syntax](training.md), and Rhasspy takes care of the rest.
|
||||
You specify voice commands in a [template language](training.md):
|
||||
|
||||
## Motivation
|
||||
```
|
||||
[LightState]
|
||||
states = (on | off)
|
||||
turn (<states>){state} [the] light
|
||||
```
|
||||
|
||||
A typical voice assistant (Alexa, Google Home, etc.) solves a number of important problems:
|
||||
and Rhasspy will produce [JSON](https://json.org) events that can trigger actions in [home automation software](https://www.home-assistant.io/docs/automation/trigger/#event-trigger) or [Node-RED flows](usage.md#node-red):
|
||||
|
||||
1. Deciding when to record audio ([wake word](wake-word.md))
|
||||
2. Listening for voice commands ([command listener](command-listener.md))
|
||||
3. Transcribing command/question ([speech to text](speech-to-text.md))
|
||||
4. Interpreting the speaker's **intent** from the text ([intent recognition](intent-recognition.md))
|
||||
5. Fulfilling the speaker's intent ([intent handling](intent-handling.md))
|
||||
```json
|
||||
{
|
||||
"text": "turn on the light",
|
||||
"intent": {
|
||||
"name": "LightState"
|
||||
},
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy provides **offline, private solutions** to problems 1-4 using off-the-shelf tools. These tools are:
|
||||
Rhasspy is <strong>optimized for</strong>:
|
||||
|
||||
* **Wake word**
|
||||
* [Pocketsphinx keyphrase](https://cmusphinx.github.io/wiki/tutoriallm/#using-keyword-lists-with-pocketsphinx)
|
||||
* [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* [snowboy](https://snowboy.kitt.ai)
|
||||
* [porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* **Command listener**
|
||||
* [webrtcvad](https://github.com/wiseman/py-webrtcvad)
|
||||
* **Speech to text**
|
||||
* [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
* **Intent recognition**
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)
|
||||
* [Mycroft Adapt](https://github.com/MycroftAI/adapt)
|
||||
* [flair](http://github.com/zalandoresearch/flair)
|
||||
* [Rasa NLU](https://rasa.com/)
|
||||
* Working with external services via [MQTT](usage.md#mqtt), [HTTP](usage.md#http-api), and [Websockets](usage.md#websocket-events)
|
||||
* Home Assistant and Hass.IO have [built-in support](usage.md#home-assistant)
|
||||
* Pre-specified voice commands that are described well [by a grammar](training.md#sentencesini)
|
||||
* You can also do [open-ended speech recognition](speech-to-text.md#open-transcription)
|
||||
* Voice commands with [uncommon words or pronunciations](usage.md#words-tab)
|
||||
* New words are added phonetically with [automated assistance](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
|
||||
For problem 5 (fulfilling the speaker's intent), Rhasspy works with external home automation software, such as Home Assistant's built-in [automation capability](https://www.home-assistant.io/docs/automation/) or a [Node-RED flow](https://nodered.org).
|
||||
## Getting Started
|
||||
|
||||
For each intent you define, Rhasspy emits a JSON event that can do anything Home Assistant can do (toggle switches, call REST services, etc.). This means that Rhasspy will do very little out of the box compared to other voice assistants, but there are also be *no limits* to what can be done.
|
||||
Ready to try Rhasspy? Follow the steps below and check out the [tutorials](tutorials.md).
|
||||
|
||||
1. Make sure you have the [necessary hardware](hardware.md)
|
||||
2. Choose an [installation method](installation.md)
|
||||
3. Access the [web interface](usage.md#web-interface) to download a profile
|
||||
4. Author your [custom voice commands](training.md) and train Rhasspy
|
||||
5. Connect Rhasspy to [Home Assistant](usage.md#home-assistant) or a [Node-RED](usage.md#node-red) flow
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you have problems, please stop by the [Rhasspy community site](https://community.rhasspy.org) or [open a GitHub issue](https://github.com/synesthesiam/rhasspy/issues).
|
||||
|
||||
## Supported Languages
|
||||
|
||||
@@ -56,124 +66,7 @@ Rhasspy supports the following languages:
|
||||
* Swedish (`sv`)
|
||||
* Catalan (`ca`)
|
||||
|
||||
Support for these languages comes directly from existing [CMU Sphinx](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/) and [Kaldi](https://montreal-forced-aligner.readthedocs.io/en/latest/pretrained_models.html) acoustic models.
|
||||
|
||||
It is possible to extend Rhasspy to new languages with only:
|
||||
|
||||
* A [phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/#using-g2p-seq2seq-to-extend-the-dictionary)
|
||||
* A trained [acoustic model](https://cmusphinx.github.io/wiki/tutorialam/)
|
||||
* A [grapheme to phoneme model](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
|
||||
The table below summarizes language support across the various supporting technologies that Rhasspy uses:
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | | | | | | | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## How It Works
|
||||
|
||||
Rhasspy starts off asleep, listening for a [wake word](wake-word.md). Once awoken, it listens for a [voice command](command-listener.md). After recording the command, its transcribed with the [speech to text](speech-to-text.md) system into text, which is then run through an [intent recognizer](intent-recognition.md). Finally, the recognized intent is used to generate an event that can be [handled by Home Assistant or Node-RED](intent-handling.md).
|
||||
|
||||

|
||||
|
||||
## Customization
|
||||
|
||||
Every step of Rhasspy's processing pipeline can be customized, including using a remote Rhasspy server via its [HTTP API](usage.md#http-api) for [speech to text](speech-to-text.md#remote-http-server) and [intent recognition](intent-recognition.md#remote-http-server). Some useful Rhasspy API endpoints are:
|
||||
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
|
||||
Additionally, you can call out to a custom external program for [wake word detection](wake-word.md#command), [voice command listening](command-listener.md#command), [speech recognition](speech-to-text.md#command), [intent recognition](intent-recognition.md#command), and event [intent handling](intent-handling.md#command)! This means that you can use Rhasspy as a general voice command toolkit, with or without Home Assistant.
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightColor` that can be said a few different ways:
|
||||
|
||||
[ChangeLightColor]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](doc/sentences/md) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once.
|
||||
|
||||
Intended Audience
|
||||
---------------------
|
||||
## Intended Audience
|
||||
|
||||
Rhasspy is intended for advanced users that want to have a voice interface to Home Assistant, but value **privacy** and **freedom** above all else. There are many other voice assistants, but none (to my knowledge) that:
|
||||
|
||||
|
||||
@@ -2,20 +2,22 @@
|
||||
|
||||
Rhasspy should run in a variety of software environments, including:
|
||||
|
||||
* Within a [Docker](https://www.docker.com/) container
|
||||
* As a [Hass.io add-on](https://www.home-assistant.io/addons/)
|
||||
* Inside a [Python virtual environment](https://docs.python-guide.org/dev/virtualenvs/)
|
||||
* Within a [Docker](#docker) container
|
||||
* As a [Hass.io add-on](#hassio)
|
||||
* Inside a [Python virtual environment](#virtual-environment)
|
||||
* Running as a [service](#running-as-a-service)
|
||||
* Build [from source](#build-from-source)
|
||||
|
||||
### Docker
|
||||
## Docker
|
||||
|
||||
The easiest way to try Rhasspy is with Docker. To get started, make sure you have [Docker installed](https://docs.docker.com/install/):
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
|
||||
and that your user is part of the `docker` group:
|
||||
|
||||
sudo usermod -a -G docker $USER
|
||||
|
||||
|
||||
**Be sure to reboot** after adding yourself to the `docker` group!
|
||||
|
||||
Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rhasspy-server) in the background:
|
||||
@@ -27,9 +29,9 @@ Next, start the [Rhasspy Docker image](https://hub.docker.com/r/synesthesiam/rha
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
|
||||
This will start Rhasspy with the English profile (`en`) in the background (`-d`) on port 12101 (`-p`) and give Rhasspy access to your microphone (`--device`). Any changes you make to [your profile](profiles.md) will be saved to `~/.config/rhasspy`.
|
||||
|
||||
|
||||
Once it starts, Rhasspy's web interface should be accessible at [http://localhost:12101](http://localhost:12101). If something went wrong, trying running docker with `-it` instead of `-d` to see the output.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your `docker-compose.yml` file:
|
||||
@@ -45,7 +47,7 @@ If you're using [docker compose](https://docs.docker.com/compose/), add the foll
|
||||
- "/dev/snd:/dev/snd"
|
||||
command: --user-profiles /profiles --profile en
|
||||
|
||||
### Hass.io
|
||||
## Hass.io
|
||||
|
||||
The second easiest was to install Rhasspy is as a [Hass.io add-on](https://www.home-assistant.io/addons/). Following the [installation instructions for Hass.io](https://www.home-assistant.io/hassio/installation/) before proceeding.
|
||||
|
||||
@@ -61,35 +63,92 @@ Before starting the add-on, make sure to give it access to your microphone and s
|
||||
|
||||

|
||||
|
||||
|
||||
### Virtual Environment
|
||||
## Virtual Environment
|
||||
|
||||
Rhasspy can be installed into a Python virtual environment, though there are a number of requirements. This may be desirable, however, if you have trouble getting Rhasspy to access your microphone from within a Docker container. To start, clone the repo somewhere:
|
||||
|
||||
git clone https://github.com/synesthesiam/rhasspy.git
|
||||
|
||||
|
||||
Then run the `download-dependencies.sh` and `create-venv.sh` scripts (assumes a Debian distribution):
|
||||
|
||||
cd rhasspy/
|
||||
./download-dependencies.sh
|
||||
./create-venv.sh
|
||||
|
||||
|
||||
Once the installation finishes (5-10 minutes on a Raspberry Pi 3), you can use the `run-venv.sh` script to start Rhasspy:
|
||||
|
||||
./run-venv.sh --profile en
|
||||
|
||||
|
||||
If all is well, the web interface will be available at [http://localhost:12101](http://localhost:12101)
|
||||
|
||||
### Software Requirements
|
||||
### Running as a Service
|
||||
|
||||
At its core, Rhasspy requires:
|
||||
Once installed, Rhasspy can be run as a [systemd service](https://systemd.io/). An [example unit file](https://github.com/synesthesiam/rhasspy/blob/master/etc/rhasspy.service) is available (thanks [UnderpantsGnome](https://github.com/UnderpantsGnome)):
|
||||
|
||||
```
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
* Replace `/home/<USER>/path/to/rhasspy` with the full path to your Rhasspy installation (where `run-venv.sh` is).
|
||||
* Replace `<LANGUAGE>` with your profile language (e.g., `en`)
|
||||
|
||||
Create a file named `rhasspy.service` in the `/home/<USER>/.config/systemd/user` directory (you may need to create the directory itself). Once the file has been saved, run:
|
||||
|
||||
```bash
|
||||
systemctl --user daemon-reload
|
||||
```
|
||||
|
||||
Then, you can start Rhasspy with:
|
||||
|
||||
```bash
|
||||
systemctl --user start rhasspy
|
||||
```
|
||||
|
||||
If you'd like Rhasspy to start on boot, run:
|
||||
|
||||
```bash
|
||||
systemctl --user enable --now rhasspy
|
||||
```
|
||||
|
||||
## Build From Source
|
||||
|
||||
The `create-venv.sh` script uses [pre-compiled binaries](https://github.com/synesthesiam/rhasspy/releases/tag/v2.0) for Rhasspy's required tools:
|
||||
|
||||
* [OpenFST](https://www.openfst.org)
|
||||
* [Opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary)
|
||||
* [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus)
|
||||
* [Kaldi](https://kaldi-asr.org)
|
||||
|
||||
The [build-from-source.sh](https://github.com/synesthesiam/rhasspy/blob/master/build-from-source.sh) attempts to build all of these tools from source. The binary artifacts (command-line tools, shared libraries) are installed into the `bin` and `lib` directories of a Python virtual environment. The `run-venv.sh` script automatically adds these directories to `PATH` and `LD_LIBRARY_PATH` before starting Rhasspy.
|
||||
|
||||
### Swap Size
|
||||
|
||||
On low memory devices like the Raspberry Pi, building the tools above can quickly consume the entire RAM. Before building, it's highly recommended that you increase the available swap space by several gigabytes:
|
||||
|
||||
1. Edit `/etc/dphys-swapfile`
|
||||
2. Change `CONF_SWAPSIZE` to something large, like 2048 (2GB)
|
||||
3. Reboot
|
||||
|
||||
### Kaldi
|
||||
|
||||
You can skip building Kaldi if you plan to just [use Pocketsphinx](speech-to-text.md#pocketsphinx) for speech recognition.
|
||||
|
||||
* Linux
|
||||
* Python 3.6
|
||||
* [Flask](https://pypi.org/project/Flask/) web server, including
|
||||
* [flask-swagger-ui](https://pypi.org/project/flask-swagger-ui/) for HTTP API documentation
|
||||
* [Flask-Cors](https://pypi.org/project/Flask-Cors/) for [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) stuff
|
||||
* [Flask-Sockets](https://pypi.org/project/Flask-Sockets/) for websocket support
|
||||
* [pydash](https://pypi.org/project/pydash/) utility library
|
||||
|
||||
To actually use any components, however, requires a lot of [extra software](about.md#supporting-tools).
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# Intent Handling
|
||||
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to Home Assistant or Node-RED.
|
||||
After a voice command has been transcribed and your intent has been successfully recognized, Rhasspy is ready to send a JSON event to another system like Home Assistant or Node-RED.
|
||||
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Remote Server](#remote-server)
|
||||
* [Command](#command)
|
||||
|
||||
Regardless of which intent handling system you choose, Rhasspy emits JSON events [over a websocket connection](usage.md#websocket-events).
|
||||
|
||||
@@ -112,10 +116,60 @@ Set `home_assistant.pem_file` to the full path to your <a href="http://docs.pyth
|
||||
|
||||
Use the environment variable `RHASSPY_PROFILE_DIR` to reference your current profile's directory. For example, `$RHASSPY_PROFILE_DIR/my.pem` will tell Rhasspy to use a file named `my.pem` in your profile directory when verifying your self-signed certificate.
|
||||
|
||||
## Remote Server
|
||||
|
||||
Rhasspy can POST the intent JSON to a remote URL.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"handle": {
|
||||
"system": "remote",
|
||||
"remote": {
|
||||
"url": "http://<address>:<port>/path/to/endpoint"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When an intent is recognized, Rhasspy will POST to `handle.remote.url` with the intent JSON. You should **return JSON** back, optionally with additional information. If `handle.forward_to_hass` is `true`, Rhasspy will look for a `hass_event` property of the returned JSON with the following structure:
|
||||
|
||||
```json
|
||||
{
|
||||
// rest of input JSON
|
||||
// ...
|
||||
"hass_event": {
|
||||
"event_type": "...",
|
||||
"event_data": {
|
||||
"key": "value",
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.RemoteIntentHandler` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Once an intent is successfully recognized, Rhasspy will send an event to Home Assistant with the details. You can call a custom program instead *or in addition* to this behavior.
|
||||
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
@@ -144,7 +198,7 @@ When an intent is recognized, Rhasspy will call your custom program with the int
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Rhasspy will create the Home Assistant event based on this information. If it is **not** present, the remaining intent information will be used to construct the event as normal (i.e., `intent` and `entities`). If `handle.forward_to_hass` is `false`, the output of your program is not used.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
@@ -155,6 +209,21 @@ The following environment variables are available to your program:
|
||||
|
||||
See [handle.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/handle.sh) for an example program.
|
||||
|
||||
### Speech
|
||||
|
||||
If the returned JSON contains a "speech" key like this:
|
||||
|
||||
```json
|
||||
{
|
||||
...
|
||||
"speech": {
|
||||
"text": "Some text to speak."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then Rhasspy will forward `speech.text` to the configured [text to speech](text-to-speech.md) system.
|
||||
|
||||
See `rhasspy.intent_handler.CommandIntentHandler` for details.
|
||||
|
||||
## Dummy
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Intent Recognition
|
||||
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
After your voice command has been transcribed by the [speech to text](speech-to-text.md) system, the next step is to recognize your intent.
|
||||
The end result is a JSON event with information about the intent.
|
||||
|
||||
The following table summarizes the trade-offs of using each intent recognizer:
|
||||
@@ -61,7 +61,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "adapt",
|
||||
"system": "adapt",
|
||||
"adapt": {
|
||||
"stop_words": "stop_words.txt"
|
||||
}
|
||||
@@ -80,7 +80,7 @@ Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"intent": {
|
||||
"system": "flair",
|
||||
"system": "flair",
|
||||
"flair": {
|
||||
"data_dir": "flair_data",
|
||||
"max_epochs": 25,
|
||||
@@ -155,6 +155,12 @@ Because Home Assistant will already handle your intent (probably using an [inten
|
||||
|
||||
See `rhasspy.intent.HomeAssistantConversationRecognizer` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes intent recognitions/failures to `hermes/intent/<INTENT_NAME>` or `hermes/nlu/intentNotRecognized` ([Hermes protocol](https://docs.snips.ai/reference/hermes)).
|
||||
|
||||
This is enabled by default and controlled by the `mqtt.publish_intents` setting in your [profile](profiles.md).
|
||||
|
||||
## Command
|
||||
|
||||
Recognizes intents from text using a custom external program.
|
||||
@@ -190,7 +196,7 @@ When a voice command is successfully transcribed, your program will be called wi
|
||||
"text": "set the bedroom light to red"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
|
||||
@@ -40,226 +40,8 @@ If you need to install Rhasspy onto a machine that is not connected to the inter
|
||||
2. `fr-g2p.tar.gz`
|
||||
3. `fr-small.lm.gz`
|
||||
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
|
||||
If you want to know precisely which files Rhasspy is looking for for a given profile, visit the `profiles` directory in [the source code](https://github.com/synesthesiam/rhasspy/tree/master/profiles) and examine these scripts in that profile's directory:
|
||||
|
||||
* `download-profile.sh`
|
||||
* Downloads and extracts all required binary artifacts. Uses cache in `download` directory unless `--delete` option is given.
|
||||
* `check-profile.sh`
|
||||
* Verifies that required binary artifacts are present. Returns non-zero exit code if download is required.
|
||||
If your user profile directory is `$HOME/.config/rhasspy/profiles`, then you should download/copy all three artifacts to `$HOME/.config/rhasspy/profiles/fr/download` on the offline machine. Now, when Rhasspy loads the `fr` profile and you click "Download", it will extract the files in the `download` directory without going out to the internet.
|
||||
|
||||
## Available Settings
|
||||
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16Khz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `base_language_model` - large general language model (read only)
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_graph` - directory where large general HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `grammars_dir` - directory to write generated JSGF grammars from sentences ini file
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [Rasa NLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote Rasa NLU server (e.g., `http://localhost:5005/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `wavenet` - configuration for Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet)
|
||||
* `cache_dir` - path to directory in your profile where WAV files are cached
|
||||
* `credentials_json` - path to the JSON credentials file (generated online)
|
||||
* `gender` - gender of speaker (`MALE` `FEMALE`)
|
||||
* `language_code` - language/locale e.g. `en-US`,
|
||||
* `sample_rate` - WAV sample rate (default: 22050)
|
||||
* `url` - URL of WaveNet endpoint
|
||||
* `voice` - voice to use (e.g., `Wavenet-C`)
|
||||
* `fallback_tts` - text to speech system to use when offline or error occurs (e.g., `espeak`)
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `porcupine`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `porcupine` - configuration for [PicoVoice's Porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* `library_path` - path to `libpv_porcupine.so` for your platform/architecture
|
||||
* `model_path` - path to the `porcupine_params.pv` (lib/common)
|
||||
* `keyword_path` - path to the `.ppn` keyword file
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, `http`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `http` - configuration for HTTP audio stream
|
||||
* `host` - hostname or IP address of HTTP audio server (default 127.0.0.1)
|
||||
* `port` - port to receive audio stream on (default 12333)
|
||||
* `stop_after` - one of "never", "text", or "intent" ([see documentation](audio-input.md#http-stream))
|
||||
* `gstreamer` - configuration for GStreamer audio recorder
|
||||
* `pipeline` - GStreamer pipeline (e.g., `FILTER ! FILTER ! ...`) without sink
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `tuning` - configuration for acoustic model tuning
|
||||
* `system` - system for tuning (currently only `sphinxtrain`)
|
||||
* `sphinxtrain` - configuration for [sphinxtrain](https://github.com/cmusphinx/sphinxtrain) based acoustic model tuning
|
||||
* `mllr_matrix` - name of generated MLLR matrix (should match `speech_to_text.pocketsphinx.mllr_matrix`)
|
||||
* `download` - configuration for profile file downloading
|
||||
* `cache_dir` - directory in your profile where downloaded files are cached
|
||||
* `conditions` - profile settings that will trigger file downloads
|
||||
* keys are profile setting paths (e.g., `wake.system`)
|
||||
* values are dictionaries whose keys are profile settings values (e.g., `snowboy`)
|
||||
* settings may have the form `<=N` or `!X` to mean "less than or equal to N" or "not X"
|
||||
* leaf nodes are dictionaries whose keys are destination file paths and whose values reference the `files` dictionary
|
||||
* `files` - locations, etc. of files to download
|
||||
* keys are names of files
|
||||
* values are dictionaries with:
|
||||
* `url` - URL of file to download
|
||||
* `cache` - `false` if file should be downloaded directly into profile (skipping cache)
|
||||
See [the reference](reference.md#profile-settings) for all available profile settings.
|
||||
|
||||
@@ -0,0 +1,594 @@
|
||||
# Reference
|
||||
|
||||
* [Supported Languages](#supported-languages)
|
||||
* [HTTP API](#http-api)
|
||||
* [Websocket API](#websocket-api)
|
||||
* [MQTT API](#mqtt-api)
|
||||
* [Command Line](#command-line)
|
||||
* [Profile Settings](#profile-settings)
|
||||
|
||||
## Supported Languages
|
||||
|
||||
The table below lists which components and compatible with Rhasspy's supported languages.
|
||||
|
||||
| Category | Name | Offline? | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | sv | ca |
|
||||
| -------- | ------ | -------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| **Wake Word** | [pocketsphinx](wake-word.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | |
|
||||
| | [porcupine](wake-word.md#porcupine) | ✓ | ✓ | | | | | | | | | | | | | |
|
||||
| | [snowboy](wake-word.md#snowboy) | *requires account* | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| | [precise](wake-word.md#mycroft-precise) | ✓ | ✓ | • | • | • | • | • | • | • | • | • | • | • | • | • |
|
||||
| **Speech to Text** | [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| | [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | ✓ | |
|
||||
| **Intent Recognition** | [fsticuffs](intent-recognition.md#fsticuffs) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [adapt](intent-recognition.md#mycroft-adapt) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flair](intent-recognition.md#flair) | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | | | | | ✓ | | ✓ |
|
||||
| | [rasaNLU](intent-recognition.md#rasanlu) | *needs extra software* | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| **Text to Speech** | [espeak](text-to-speech.md#espeak) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| | [flite](text-to-speech.md#flite) | ✓ | ✓ | | | | | | | | ✓ | | | | | |
|
||||
| | [picotts](text-to-speech.md#picotts) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | | | | | | | | |
|
||||
| | [marytts](text-to-speech.md#marytts) | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | | | | | | | |
|
||||
| | [wavenet](text-to-speech.md#google-wavenet) | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | ✓ | |
|
||||
|
||||
• - yes, but requires training/customization
|
||||
|
||||
## HTTP API
|
||||
|
||||
Rhasspy's HTTP endpoints are documented below. You can also visit `/api/` in your Rhasspy server (note the final slash) to try out each endpoint.
|
||||
|
||||
Application authors may want to use the [rhasspy-client](https://pypi.org/project/rhasspy-client/), which provides a high-level interface to a remote Rhasspy server.
|
||||
|
||||
### Endpoints
|
||||
|
||||
* `/api/custom-words`
|
||||
* GET custom word dictionary as plain text, or POST to overwrite it
|
||||
* See `custom_words.txt` in your profile directory
|
||||
* `/api/download-profile`
|
||||
* Force Rhasspy to re-download profile
|
||||
* `?delete=true` - clear download cache
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `?timeout=<seconds>` - override default command timeout
|
||||
* `?entity=<entity>&value=<value>` - set custom entity/value in recognized intent
|
||||
* `/api/listen-for-wake-word`
|
||||
* POST to wake Rhasspy up and return immediately
|
||||
* `/api/lookup`
|
||||
* POST word as plain text to look up or guess pronunciation
|
||||
* `?n=<number>` - return at most `n` guessed pronunciations
|
||||
* `/api/microphones`
|
||||
* GET list of available microphones
|
||||
* `/api/phonemes`
|
||||
* GET example phonemes from speech recognizer for your profile
|
||||
* See `phoneme_examples.txt` in your profile directory
|
||||
* `/api/play-wav`
|
||||
* POST to play WAV data
|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `?layers=profile` to only see settings different from `defaults.json`
|
||||
* See `profile.json` in your profile directory
|
||||
* `/api/restart`
|
||||
* Restart Rhasspy server
|
||||
* `/api/sentences`
|
||||
* GET voice command templates or POST to overwrite
|
||||
* Set `Accept: application/json` to GET JSON with all sentence files
|
||||
* Set `Content-Type: application/json` to POST JSON with sentences for multiple files
|
||||
* See `sentences.ini` and `intents` directory in your profile
|
||||
* `/api/slots`
|
||||
* GET slot values as JSON or POST to add to/overwrite them
|
||||
* `?overwrite_all=true` to clear slots in JSON before writing
|
||||
* `/api/speakers`
|
||||
* GET list of available audio output devices
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* Returns intent JSON when command is finished
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/test-microphones`
|
||||
* GET list of available microphones and if they're working
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* Returns intent JSON when command has been processed
|
||||
* `?nohass=true` - stop Rhasspy from handling the intent
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
* `?play=false` - get WAV data instead of having Rhasspy speak
|
||||
* `?voice=<voice>` - override default TTS voice
|
||||
* `?language=<language>` - override default TTS language or locale
|
||||
* `?repeat=true` - have Rhasspy repeat the last sentence it spoke
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `?nocache=true` - re-train profile from scratch
|
||||
* `/api/unknown-words`
|
||||
* GET words that Rhasspy doesn't know in your sentences
|
||||
* See `unknown_words.txt` in your profile directory
|
||||
|
||||
## Websocket API
|
||||
|
||||
* `/api/events/intent`
|
||||
* Listen for recognized intents published as JSON
|
||||
* `/api/events/log`
|
||||
* Listen for log messages published as plain text
|
||||
|
||||
## MQTT API
|
||||
|
||||
Rhasspy implements part of the [Hermes](https://docs.snips.ai/reference/hermes) protocol. Various services of Rhasspy can be configured to pass along MQTT messages or to react to MQTT messages following the Hermes protocol.
|
||||
|
||||
* `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Rhasspy publishes audio in WAV format to this topic. By default it is 16 kHz, 16-bit mono for compatibility reasons, but other types are possible too.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `REQUEST_ID` is generated using `uuid.uuid4` each time a sound is played.
|
||||
* `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Rhasspy listens to this topic for WAV data. Audio is automatically converted to 16 kHz, 16-bit mono audio and played.
|
||||
* `SITE_ID` is set in Rhasspy's `mqtt` configuration.
|
||||
* `hermes/asr/startListening`
|
||||
* Rhasspy wakes up and starts recording on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/asr/stopListening`
|
||||
* Rhasspy stops recording and processes the voice command on receiving this topic.
|
||||
* The payload is a JSON object with a `siteId` key that holds Rhasspy's site ID.
|
||||
* `hermes/intent/<INTENT_NAME>`
|
||||
* Rhasspy publishes a message to this topic on recognition of an intent.
|
||||
* The payload is a JSON object with the recognized intent, entities and text.
|
||||
* `hermes/nlu/intentNotRecognized`
|
||||
* Rhasspy publishes a message to this topic when it doesn't recognize an intent.
|
||||
* `hermes/asr/textCaptured`
|
||||
* Rhasspy publishes a transcription to this topic each time a voice command is recognized.
|
||||
* `hermes/hotword/<WAKEWORD_ID>/detected`
|
||||
* Rhasspy wakes up when a message is received on this topic.
|
||||
|
||||
## Command Line
|
||||
|
||||
Rhasspy provides a powerful [command-line interface](usage.md#command-line) called `rhasspy-cli`.
|
||||
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the English profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Profile Settings
|
||||
|
||||
All available profile sections and settings are listed below:
|
||||
|
||||
* `rhasspy` - configuration for Rhasspy assistant
|
||||
* `preload_profile` - true if speech/intent recognizers should be loaded immediately for default profile (default: `true`)
|
||||
* `listen_on_start` - true if Rhasspy should listen for wake word at startup (default: `true`)
|
||||
* `load_timeout_sec` - number of seconds to wait for internal actors before proceeding with start up
|
||||
* `home_assistant` - how to communicate with Home Assistant/Hass.io
|
||||
* `url` - Base URL of Home Assistant server (no `/api`)
|
||||
* `access_token` - long-lived access token for Home Assistant (Hass.io token is used automatically)
|
||||
* `api_password` - Password, if you have that enabled (deprecated)
|
||||
* `pem_file` - Full path to your <a href="http://docs.python-requests.org/en/latest/user/advanced/#ssl-cert-verification">CA_BUNDLE file or a directory with certificates of trusted CAs</a>
|
||||
* `event_type_format` - Python format string used to create event type from intent type (`{0}`)
|
||||
* `speech_to_text` - transcribing [voice commands to text](speech-to-text.md)
|
||||
* `system` - name of speech to text system (`pocketsphinx`, `kaldi`, `remote`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for [Pocketsphinx](speech-to-text.md#pocketsphinx)
|
||||
* `compatible` - true if profile can use pocketsphinx for speech recognition
|
||||
* `acoustic_model` - directory with CMU 16 kHz acoustic model
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `language_model` - text file with trigram [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) built from example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `base_language_model` - large general language model (read only)
|
||||
* `mllr_matrix` - MLLR matrix from [acoustic model tuning](https://cmusphinx.github.io/wiki/tutorialtuning/)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `kaldi` - configuration for [Kaldi](speech-to-text.md#kaldi)
|
||||
* `compatible` - true if profile can use Kaldi for speech recognition
|
||||
* `kaldi_dir` - absolute path to Kaldi root directory
|
||||
* `model_dir` - directory where Kaldi model is stored (relative to profile directory)
|
||||
* `graph` - directory where HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_graph` - directory where large general HCLG.fst is located (relative to `model_dir`)
|
||||
* `base_dictionary` - large text file with word pronunciations (read only)
|
||||
* `custom_words` - small text file with words/pronunciations added by user
|
||||
* `dictionary` - text file with all words/pronunciations needed for example sentences
|
||||
* `open_transcription` - true if general language model should be used (custom voices commands ignored)
|
||||
* `unknown_words` - small text file with guessed word pronunciations (from phonetisaurus)
|
||||
* `mix_weight` - how much of the base language model to [mix in during training](training.md#language-model-mixing) (0-1)
|
||||
* `mix_fst` - path to save mixed ngram FST model
|
||||
* `remote` - configuration for [remote Rhasspy server](speech-to-text.md#remote-http-server)
|
||||
* `url` - URL to POST WAV data for transcription (e.g., `http://your-rhasspy-server:12101/api/speech-to-text`)
|
||||
* `command` - configuration for [external speech-to-text program](speech-to-text.md#command)
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `sentences_ini` - Ini file with example [sentences/JSGF templates](training.md#sentencesini) grouped by intent
|
||||
* `sentences_dir` - Directory with additional sentence templates (default: `intents`)
|
||||
* `g2p_model` - finite-state transducer for phonetisaurus to guess word pronunciations
|
||||
* `g2p_casing` - casing to force for g2p model (`upper`, `lower`, or blank)
|
||||
* `dictionary_casing` - casing to force for dictionary words (`upper`, `lower`, or blank)
|
||||
* `grammars_dir` - directory to write generated JSGF grammars from sentences ini file
|
||||
* `fsts_dir` - directory to write generated finite state transducers from JSGF grammars
|
||||
* `intent` - transforming text commands to intents
|
||||
* `system` - intent recognition system (`fsticuffs`, `fuzzywuzzy`, `rasa`, `remote`, `adapt`, `command`, or `dummy`)
|
||||
* `fsticuffs` - configuration for [OpenFST-based](https://www.openfst.org) intent recognizer
|
||||
* `intent_fst` - path to generated finite state transducer with all intents combined
|
||||
* `ignore_unknown_words` - true if words not in the FST symbol table should be ignored
|
||||
* `fuzzy` - true if text is matching in a fuzzy manner, skipping words in `stop_words.txt`
|
||||
* `fuzzywuzzy` - configuration for simplistic [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) based intent recognizer
|
||||
* `examples_json` - JSON file with intents/example sentences
|
||||
* `min_confidence` - minimum confidence required for intent to be converted to a JSON event (0-1)
|
||||
* `remote` - configuration for remote Rhasspy server
|
||||
* `url` - URL to POST text to for intent recognition (e.g., `http://your-rhasspy-server:12101/api/text-to-intent`)
|
||||
* `rasa` - configuration for [Rasa NLU](https://rasa.com/) based intent recognizer
|
||||
* `url` - URL of remote Rasa NLU server (e.g., `http://localhost:5005/`)
|
||||
* `examples_markdown` - Markdown file to generate with intents/example sentences
|
||||
* `project_name` - name of project to generate during training
|
||||
* `adapt` - configuration for [Mycroft Adapt](https://github.com/MycroftAI/adapt) based intent recognizer
|
||||
* `stop_words` - text file with words to ignore in training sentences
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `text_to_speech` - pronouncing words
|
||||
* `system` - text to speech system (`espeak`, `flite`, `picotts`, `marytts`, `command`, or `dummy`)
|
||||
* `espeak` - configuration for [eSpeak](http://espeak.sourceforge.net)
|
||||
* `phoneme_map` - text file mapping CMU phonemes to eSpeak phonemes
|
||||
* `flite` - configuration for [flite](http://www.festvox.org/flite)
|
||||
* `voice` - name of voice to use (e.g., `kal16`, `rms`, `awb`)
|
||||
* `picotts` - configuration for [PicoTTS](https://en.wikipedia.org/wiki/SVOX)
|
||||
* `language` - language to use (default if not present)
|
||||
* `marytts` - configuration for [MaryTTS](http://mary.dfki.de)
|
||||
* `url` - address:port of MaryTTS server (port is usually 59125)
|
||||
* `voice` - name of voice to use (e.g., `cmu-slt`). Default if not present.
|
||||
* `locale` - name of locale to use (e.g., `en-US`). Default if not present.
|
||||
* `wavenet` - configuration for Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet)
|
||||
* `cache_dir` - path to directory in your profile where WAV files are cached
|
||||
* `credentials_json` - path to the JSON credentials file (generated online)
|
||||
* `gender` - gender of speaker (`MALE` `FEMALE`)
|
||||
* `language_code` - language/locale e.g. `en-US`,
|
||||
* `sample_rate` - WAV sample rate (default: 22050)
|
||||
* `url` - URL of WaveNet endpoint
|
||||
* `voice` - voice to use (e.g., `Wavenet-C`)
|
||||
* `fallback_tts` - text to speech system to use when offline or error occurs (e.g., `espeak`)
|
||||
* `phoneme_examples` - text file with examples for each CMU phoneme
|
||||
* `training` - training speech/intent recognizers
|
||||
* `dictionary_number_duplicates` - true if duplicate words in dictionary should be suffixed by `(2)`, `(3)`, etc.
|
||||
* `tokenizer` - system used to break sentences into words (`regex` only for now)
|
||||
* `regex` - configuration for regex tokenizer
|
||||
* `replace` - list of dictionaries with patterns/replacements used on each example sentence
|
||||
* `split` - pattern used to break sentences into words
|
||||
* `unknown_words` - configuration for dealing with words not in base/custom dictionaries
|
||||
* `fail_when_present` - true if Rhasspy should halt training when unknown words are found
|
||||
* `guess_pronunciations` - true if [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) should be used to guess how an unknown word is pronounced
|
||||
* `speech_to_text` - training for speech decoder
|
||||
* `system` - speech to text training system (`auto`, `pocketsphinx`, `kaldi`, `command`, or `dummy`)
|
||||
* `command` - configuration for external speech-to-text training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `intent` - training for intent recognizer
|
||||
* `system` - intent recognizer training system (`auto`, `fsticuffs`, `fuzzywuzzy`, `rasa`, `adapt`, `command`, or `dummy`)
|
||||
* `command` - configuration for external intent recognizer training program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `wake` - waking Rhasspy up for speech input
|
||||
* `system` - wake word recognition system (`pocketsphinx`, `snowboy`, `precise`, `porcupine`, `command`, or `dummy`)
|
||||
* `pocketsphinx` - configuration for Pocketsphinx wake word recognizer
|
||||
* `keyphrase` - phrase to wake up on (3-4 syllables recommended)
|
||||
* `threshold` - sensitivity of detection (recommended range 1e-50 to 1e-5)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Pocketsphinx (default 960)
|
||||
* `snowboy` - configuration for [snowboy](https://snowboy.kitt.ai)
|
||||
* `model` - path to model file(s), separated by commas (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `audio_gain` - audio gain (default 1)
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `chunk_size` - number of bytes per chunk to feed to snowboy (default 960)
|
||||
* `model_settings` - settings for each snowboy model path (e.g., `snowboy/snowboy.umdl`)
|
||||
* `<MODEL_PATH>`
|
||||
* `sensitivity` - model sensitivity
|
||||
* `audio_gain` - audio gain
|
||||
* `apply_frontend` - true if ApplyFrontend should be set
|
||||
* `precise` - configuration for [Mycroft Precise](https://github.com/MycroftAI/mycroft-precise)
|
||||
* `engine_path` - path to the precise-engine binary
|
||||
* `model` - path to model file (in profile directory)
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `trigger_level` - number of events to trigger activation (default 3)
|
||||
* `chunk_size` - number of bytes per chunk to feed to Precise (default 2048)
|
||||
* `porcupine` - configuration for [PicoVoice's Porcupine](https://github.com/Picovoice/Porcupine)
|
||||
* `library_path` - path to `libpv_porcupine.so` for your platform/architecture
|
||||
* `model_path` - path to the `porcupine_params.pv` (lib/common)
|
||||
* `keyword_path` - path to the `.ppn` keyword file
|
||||
* `sensitivity` - model sensitivity (0-1, default 0.5)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `microphone` - configuration for audio recording
|
||||
* `system` - audio recording system (`pyaudio`, `arecord`, `hermes`, `gstreamer`, `http`, or `dummy`)
|
||||
* `pyaudio` - configuration for [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) microphone
|
||||
* `device` - index of device to use or empty for default device
|
||||
* `frames_per_buffer` - number of frames to read at a time (default 480)
|
||||
* `arecord` - configuration for ALSA microphone
|
||||
* `device` - name of ALSA device (see `arecord -L`) to use or empty for default device
|
||||
* `chunk_size` - number of bytes to read at a time (default 960)
|
||||
* `http` - configuration for HTTP audio stream
|
||||
* `host` - hostname or IP address of HTTP audio server (default 127.0.0.1)
|
||||
* `port` - port to receive audio stream on (default 12333)
|
||||
* `stop_after` - one of "never", "text", or "intent" ([see documentation](audio-input.md#http-stream))
|
||||
* `gstreamer` - configuration for GStreamer audio recorder
|
||||
* `pipeline` - GStreamer pipeline (e.g., `FILTER ! FILTER ! ...`) without sink
|
||||
* `hermes` - configuration for MQTT "microphone" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* Subscribes to WAV data from `hermes/audioServer/<SITE_ID>/audioFrame`
|
||||
* Requires MQTT to be enabled
|
||||
* `sounds` - configuration for feedback sounds from Rhasspy
|
||||
* `system` - which sound output system to use (`aplay`, `hermes`, or `dummy`)
|
||||
* `wake` - path to WAV file to play when Rhasspy wakes up
|
||||
* `recorded` - path to WAV file to play when a command finishes recording
|
||||
* `aplay` - configuration for ALSA speakers
|
||||
* `device` - name of ALSA device (see `aplay -L`) to use or empty for default device
|
||||
* `hermes` - configuration for MQTT "speakers" ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* WAV data published to `hermes/audioServer/<SITE_ID>/playBytes/<REQUEST_ID>`
|
||||
* Requires MQTT to be enabled
|
||||
* `command`
|
||||
* `system` - which voice command listener system to use (`webrtcvad`, `oneshot`, `hermes`, or `dummy`)
|
||||
* `webrtcvad` - configuration for [webrtcvad](https://github.com/wiseman/py-webrtcvad) system
|
||||
* `sample_rate` - sample rate of input audio
|
||||
* `chunk_size` - bytes per buffer (must be 10,20,30 ms)
|
||||
* `vad_mode` - sensitivity of `webrtcvad` (0-3)
|
||||
* `min_sec` - minimum number of seconds in a command
|
||||
* `silence_sec` - number of seconds of silences after voice command before stopping
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `throwaway_buffers` - number of buffers to drop when recording starts
|
||||
* `speech_buffers` - number of buffers with speech before command starts
|
||||
* `oneshot` - configuration for voice command system that takes first audio frame as entire command
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `command` - configuration for external voice command program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `hermes` - configuration for MQTT-based voice command system that listens betweens `startListening` and `stopListening` commands ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `timeout_sec` - maximum number of seconds before stopping
|
||||
* `handle`
|
||||
* `system` - which intent handling system to use (`hass`, `command`, or `dummy`)
|
||||
* `forward_to_hass` - true if intents are always forwarded to Home Assistant (even if `system` is `command` or `remote`)
|
||||
* `command` - configuration for external speech-to-text program
|
||||
* `program` - path to executable
|
||||
* `arguments` - list of arguments to pass to program
|
||||
* `remote` - configuration for remote HTTP intent handler
|
||||
* `url` - URL to POST intent JSON to and receive response JSON from
|
||||
* `mqtt` - configuration for MQTT ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `enabled` - true if MQTT client should be started
|
||||
* `host` - MQTT host
|
||||
* `port` - MQTT port
|
||||
* `username` - MQTT username (blank for anonymous)
|
||||
* `password` - MQTT password
|
||||
* `reconnect_sec` - number of seconds before client will reconnect
|
||||
* `site_id` - ID of site ([Hermes protocol](https://docs.snips.ai/reference/hermes))
|
||||
* `publish_intents` - true if intents are published to MQTT
|
||||
* `download` - configuration for profile file downloading
|
||||
* `cache_dir` - directory in your profile where downloaded files are cached
|
||||
* `conditions` - profile settings that will trigger file downloads
|
||||
* keys are profile setting paths (e.g., `wake.system`)
|
||||
* values are dictionaries whose keys are profile settings values (e.g., `snowboy`)
|
||||
* settings may have the form `<=N` or `!X` to mean "less than or equal to N" or "not X"
|
||||
* leaf nodes are dictionaries whose keys are destination file paths and whose values reference the `files` dictionary
|
||||
* `files` - locations, etc. of files to download
|
||||
* keys are names of files
|
||||
* values are dictionaries with:
|
||||
* `url` - URL of file to download
|
||||
* `cache` - `false` if file should be downloaded directly into profile (skipping cache)
|
||||
@@ -7,7 +7,7 @@ The following table summarizes language support for the various speech to text s
|
||||
| System | en | de | es | fr | it | nl | ru | el | hi | zh | vi | pt | ca |
|
||||
| ------ | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
|
||||
| [pocketsphinx](speech-to-text.md#pocketsphinx) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ |
|
||||
| [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | | | | ✓ | | | | | ✓ | | |
|
||||
| [kaldi](speech-to-text.md#kaldi) | ✓ | ✓ | | ✓ | | ✓ | | | | | ✓ | | |
|
||||
|
||||
## Pocketsphinx
|
||||
|
||||
@@ -98,6 +98,39 @@ During speech recognition, 16-bit 16 kHz mono WAV data will be POST-ed to the en
|
||||
|
||||
See `rhasspy.stt.RemoteDecoder` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Publishes transcriptions to `hermes/asr/textCaptured` ([Hermes protocol](https://docs.snips.ai/reference/hermes)) each time a voice command is spoken.
|
||||
|
||||
This is enabled by default.
|
||||
|
||||
## Home Assistant STT Platform
|
||||
|
||||
Use an [STT platform](https://www.home-assistant.io/integrations/stt) on your Home Assistant server.
|
||||
This is the same way [Ada](https://github.com/home-assistant/ada) sends speech to Home Assistant.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"speech_to_text": {
|
||||
"system": "hass_stt",
|
||||
"hass_stt": {
|
||||
"platform": "...",
|
||||
"sample_rate": 16000,
|
||||
"bit_size": 16,
|
||||
"channels": 1,
|
||||
"language": "en-US"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
Rhasspy will convert audio to the configured format before streaming it to Home Assistant.
|
||||
In the future, this will be auto-detected from the STT platform API.
|
||||
|
||||
See `rhasspy.stt.HomeAssistantSTTIntegration` for details.
|
||||
|
||||
## Command
|
||||
|
||||
Calls a custom external program to do speech recognition.
|
||||
|
||||
@@ -89,8 +89,24 @@ To run the Docker image, simply execute:
|
||||
```bash
|
||||
docker run -it -p 59125:59125 synesthesiam/marytts:5.2
|
||||
```
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts. For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
and visit [http://localhost:59125](http://localhost:59125) after it starts.
|
||||
|
||||
If you're using [docker compose](https://docs.docker.com/compose/), add the following to your docker-compose.yml file:
|
||||
|
||||
marytts:
|
||||
image: synesthesiam/marytts:5.2
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "59125:59125"
|
||||
|
||||
When using docker-compose, set `marytts.url` in your profile to be `http://marytts:59125`. This will allow rhasspy, from within
|
||||
its docker container, to resolve and connect to marytts (its sibling container).
|
||||
|
||||
|
||||
### Adding Voices
|
||||
|
||||
For more English voices, run the following commands in a Bash shell:
|
||||
|
||||
```bash
|
||||
mkdir -p marytts-5.2/download
|
||||
@@ -111,6 +127,37 @@ Change the first line to select the voice you'd like to add. It's not recommende
|
||||
|
||||
See `rhasspy.tts.MaryTTSSentenceSpeaker` for details.
|
||||
|
||||
### Audio Effects
|
||||
|
||||
MaryTTS is capable of applying several audio effects when producing speech. See the web interface at [http://localhost:59125](http://localhost:59125)
|
||||
to experiment with this.
|
||||
|
||||
|
||||
To use these effects within Rhasspy, set `text_to_speech.marytts.effects` within your profile, for example:
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "marytts",
|
||||
"marytts": {
|
||||
"url": "http://localhost:59125",
|
||||
"effects": {
|
||||
"effect_Volume_selected": "on",
|
||||
"effect_Volume_parameters": "amount=0.9;",
|
||||
"effect_TractScaler_selected": "on",
|
||||
"effect_TractScaler_parameters": "amount:1.2;",
|
||||
"effect_F0Add_selected": "on",
|
||||
"effect_F0Add_parameters": "f0Add:-50.0;",
|
||||
"effect_Robot_selected": "on",
|
||||
"effect_Robot_parameters": "amount=50.0;"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You can determine the names of the parameters by examining the web interface [http://localhost:59125](http://localhost:59125)
|
||||
using your browser's Developer Tools.
|
||||
|
||||
|
||||
## Google WaveNet
|
||||
|
||||
Uses Google's [WaveNet](https://cloud.google.com/text-to-speech/docs/wavenet) text to speech system. This **requires a Google account and an internet connection to function**. Rhasspy will cache WAV files for previously spoken sentences, but you will be sending Google information for every new sentence that Rhasspy speaks.
|
||||
@@ -143,6 +190,25 @@ Contributed by [Romkabouter](https://github.com/Romkabouter).
|
||||
|
||||
See `rhasspy.tts.GoogleWaveNetSentenceSpeaker` for details.
|
||||
|
||||
## Home Assistant TTS Platform
|
||||
|
||||
Use a [TTS platform](https://www.home-assistant.io/integrations/tts) on your Home Assistant server.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"text_to_speech": {
|
||||
"system": "hass_tts",
|
||||
"hass_tts": {
|
||||
"platform": "..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The settings from your profile's `home_assistant` section are automatically used (URL, access token, etc.).
|
||||
|
||||
See `rhasspy.tts.HomeAssistantSentenceSpeaker` for details.
|
||||
|
||||
## Command
|
||||
|
||||
You can extend Rhasspy easily with your own external text to speech system. When a sentence needs to be spoken, Rhasspy will call your custom program with the text given on standard in. Your program should return the corresponding WAV data on standard out.
|
||||
|
||||
@@ -1,48 +1,224 @@
|
||||
# Training
|
||||
|
||||
Rhasspy is designed to recognize voice commands that [you provide](#sentencesini). These commands are categorized by **intent**, and may contain variable **slots** or **entities**, such as the color and name of a light.
|
||||
Rhasspy is designed to recognize voice commands [in a template language](#sentencesini). These commands are categorized by **intent**, and may contain [slots](#slots-lists) or [named entities](#tags), such as the color and name of a light.
|
||||
|
||||
During the training process, Rhasspy simultaneously trains *both* a speech and intent recognizer. The speech recognizer converts voice commands to text, and the intent recognizer converts text to JSON events. Combined, they enable a low power, offline system like a Raspberry Pi to understand and respond to your voice commands.
|
||||
|
||||
## How It Works
|
||||
|
||||
Recognizing voice commands typically involves two main steps:
|
||||
|
||||
1. Speech to text (transcription)
|
||||
2. Text to intent (recognition)
|
||||
|
||||
For step (1), Rhasspy uses [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org), and generates a custom [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) during the training process. Specifically, the steps are:
|
||||
|
||||
1. Convert the grammar from your [sentences.ini](#sentencesini) file to a [finite state transducer](https://www.openfst.org)
|
||||
2. (Optionally) generate all possible sentences that can be spoken with entities tagged (e.g., `name` is `bedroom light`, `color` is `red`)
|
||||
3. Use the [opengrm](http://www.opengrm.org/twiki/bin/view/GRM/NGramLibrary) toolkit to create a custom language model
|
||||
4. Train an intent recognizer with the tagged sentences
|
||||
|
||||
Additionally, a custom [CMU phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/) is generated with *only* the words in your voice commands (and wake word, if you're using a [pocketsphinx keyphrase](wake-word.md#pocketsphinx)). If the pronunciation of a word is not known, Rhasspy calls out to [phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) to get a guess, and then halts training. Once you've confirmed the pronunciations by adding them to your [custom words](#custom-words), training can continue.
|
||||
|
||||
For step (4), Rhasspy can use a [variety of intent recognition systems](intent-recognition.md). However, most are all trained from the **tagged sentences** generated from [sentences.ini](#sentencesini), e.g., `turn [on](state) the [living room lamp](name)`. These sentences are transformed into JSON, like:
|
||||
|
||||
{
|
||||
"ChangeLightState": [
|
||||
{
|
||||
"text": "turn on the living room lamp",
|
||||
"entities": [
|
||||
{ "entity": "state", "value": "on" },
|
||||
{ "entity": "name", "value": "living room lamp" }
|
||||
]
|
||||
},
|
||||
...
|
||||
],
|
||||
...
|
||||
}
|
||||
|
||||
and provided as training material to the intent recognition system. The [fuzzywuzzy](intent-recognition.md#fuzzywuzzy) system, for example, simply saves the JSON file and, during recognition, finds the closest matching sentence according to the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance). The [default intent recognizer](intent-recognition.md#fsticuffs) interacts directly with the finite state transducer(s) generated in step (1) and, while less tolerant of errors than `fuzzywuzzy`, is significantly faster for large sets of voice commands (i.e., millions).
|
||||
|
||||
More sophisticated systems like [Rasa NLU](intent-recognition.md#rasanlu) use machine learning techniques to classify sentences by intent and assign slot (entity) values. These systems are much better at recognizing sentences not seen during training, but can take minutes to hours to train.
|
||||
* Intent Recognition
|
||||
* [Basic Syntax](#basic-syntax)
|
||||
* [Named Entities](#tags)
|
||||
* [Slots](#slots-lists)
|
||||
* Speech Recognition
|
||||
* [Custom Words](#custom-words)
|
||||
* [Language Model Mixing](#language-model-mixing)
|
||||
|
||||
## sentences.ini
|
||||
|
||||
Voice commands are recognized by Rhasspy from a set of sentences that you define in your [profile](profiles.md). These are stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "values" are simplified [JSGF grammars](https://www.w3.org/TR/jsgf/). The set of all sentences *generated* from these grammars is used to train an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) and an intent recognizer.
|
||||
Voice commands stored in an [ini file](https://docs.python.org/3/library/configparser.html) whose "sections" are intents and "values" are sentence templates.
|
||||
|
||||
### Basic Syntax
|
||||
|
||||
To get started, simply list your intents (surround by brackets) and the possible ways of invoking them below:
|
||||
|
||||
```
|
||||
[TestIntent1]
|
||||
this is a sentence
|
||||
this is another sentence for the same intent
|
||||
|
||||
[TestIntent2]
|
||||
this is a sentence for a different intent
|
||||
```
|
||||
|
||||
If you say "this is a sentence" after hitting the `Train` button, it will generate a `TestIntent1`.
|
||||
|
||||
### Groups
|
||||
|
||||
You can group multiple words together using `(parentheses)` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp)
|
||||
```
|
||||
|
||||
Groups (sometimes called sequences) can be [tagged](#tags) and [substituted](#substitutions) like single words. They may also contain [alternatives](#alternatives).
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence template, you can specify optional word(s) by surrounding them `[with brackets]`. For example:
|
||||
|
||||
```
|
||||
[an] example sentence [with] some optional words
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `an example sentence with some optional words`
|
||||
* `example sentence with some optional words`
|
||||
* `an example sentence some optional words`
|
||||
* `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items where only one is matched at a time is `(specified | like | this)`. For N items, there will be N matched sentences (unless you nest optional words, etc.). The template:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
will match:
|
||||
|
||||
* `set the light to red`
|
||||
* `set the light to green`
|
||||
* `set the light to blue`
|
||||
|
||||
### Tags
|
||||
|
||||
Named entities are marked in your sentence templates with `{tags}`. The name of the `{entity}` is between the curly braces, while the `(value of the){entity}` comes immediately before:
|
||||
|
||||
```
|
||||
[SetLightColor]
|
||||
set the light to (red | green | blue){color}
|
||||
```
|
||||
|
||||
With the `{color}` tag attached to `(red | green | blue)`, Rhasspy will match:
|
||||
|
||||
* `set the light to [red](color)`
|
||||
* `set the light to [green](color)`
|
||||
* `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized, the JSON event will contain a `color` property whose value is either "red", "green" or "blue".
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
Tag/named entity values can be (substituted](#substitutions) using the colon (`:`) inside the `{curly:braces}` like:
|
||||
|
||||
```
|
||||
turn on the (living room lamp){name:light_1}
|
||||
```
|
||||
|
||||
Now the `name` property of the intent JSON event will contain "light_1" instead of "living room lamp".
|
||||
|
||||
### Substitutions
|
||||
|
||||
The colon (`:`) is used to put something different than what's spoken into the recognized intent JSON. The left-hand side of the `:` is what Rhasspy expects to hear, while the right-hand side is what gets put into the intent:
|
||||
|
||||
```
|
||||
turn on the (living room lamp):light_1
|
||||
```
|
||||
|
||||
In this example, the spoken phrase "living room lamp" will be replaced by "light_1" in the recognized intent. Substitutions work for single words, [groups](#groups), [alternatives](#alternatives), and [tags](#tags):
|
||||
|
||||
```
|
||||
turn on the living room lamp:light
|
||||
(turn | switch):switch on the living room lamp
|
||||
turn (on){action:activate} the living room lamp
|
||||
```
|
||||
|
||||
See [tag synonyms](#tag-synonyms) for more details on tag substitution.
|
||||
|
||||
You can leave the left-hand or right-hand side (or both!) of the `:` empty:
|
||||
|
||||
```
|
||||
these: words: will: be: dropped:
|
||||
:these :will :be :added
|
||||
```
|
||||
|
||||
When the right-hand side is empty (`dropped:`), the spoken word will not appear in the intent. An empty left-hand side (`:added`) means the word is *not* spoken, but will appear in the intent.
|
||||
|
||||
Leaving **both** sides empty does nothing unless you attach a [tag](#tags) it. This allows you to embed a named entity in a voice command without matching specific words:
|
||||
|
||||
```
|
||||
turn on the living room lamp (:){domain:light}
|
||||
```
|
||||
|
||||
An intent from the example above will contain a `domain` entity whose value is `light`.
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse parts of your sentence templates. They're defined by `rule_name = ...` alongside other sentences and referenced by `<rule_name>`. For example:
|
||||
|
||||
```
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
```
|
||||
|
||||
which is equivalent to:
|
||||
|
||||
```
|
||||
set the light to (red | green | blue)
|
||||
```
|
||||
|
||||
You can **share rules** across intents by referencing them as `<IntentName.rule_name>` like:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`. Rule references without a dot must exist in the current intent.
|
||||
|
||||
### Slots Lists
|
||||
|
||||
Large [alternatives](#alternatives) can become unwieldy quickly. For example, say you have a list of movie names:
|
||||
|
||||
```
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
```
|
||||
|
||||
Rather than keep this list in `sentences.ini`, you may put each movie name on a separate line in a file named `slots/movies` (no file extension) and reference it as `$movies`. Rhasspy automatically loads all files in the `slots` directory of your [profile](#profiles.md) and makes them available as slots lists.
|
||||
|
||||
For the example above, the file `slots/movies` should contain:
|
||||
|
||||
```
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
```
|
||||
|
||||
Now you can simply use the placeholder `$movies` in your sentence templates:
|
||||
|
||||
```
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
```
|
||||
|
||||
When matched, the `PlayMovie` intent JSON will contain `movie_name` property with either "Primer", "Moon", etc.
|
||||
|
||||
Make sure to **re-train** Rhasspy whenever you update your slot values.
|
||||
|
||||
#### Slot Synonyms
|
||||
|
||||
Slot values are themselves sentence templates! So you can use all of the familiar syntax from above. Slot "synonyms" can be created simply using [substitutions](#substitutions). So a file named `slots/rooms` may contain:
|
||||
|
||||
```
|
||||
[the:] (den | playroom | downstairs):den
|
||||
```
|
||||
|
||||
which is referenced by `$rooms` and will match:
|
||||
|
||||
* the den
|
||||
* den
|
||||
* the playroom
|
||||
* playroom
|
||||
* the downstairs
|
||||
* downstairs
|
||||
|
||||
This will always output just "den" because `[the:]` optionally matches "the" and then drops the word.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
|
||||
### Motivation
|
||||
|
||||
@@ -67,162 +243,6 @@ Compared to JSON, YAML, etc., there is minimal syntactic overhead for the purpos
|
||||
|
||||
Each of these shortcomings are addressed by considering the space between intent headings (`[Intent 1]`, etc.) as a **grammar** that represent many possible voice commands. The possible sentences, stripped of their tags, are used as input to [opengrm](https://www.opengrm.org) to produce a standard ARPA language model for [pocketsphinx](https://github.com/cmusphinx/pocketsphinx) or [Kaldi](https://kaldi-asr.org). The tagged sentences are then used to train an intent recognizer.
|
||||
|
||||
### Optional Words
|
||||
|
||||
Within a sentence, you can specify optional word(s) by surrounding them `[with brackets]`. These will generate at least two sentences: one with the optional word(s), and one without. So the following sentence template:
|
||||
|
||||
[an] example sentence [with] some optional words
|
||||
|
||||
will generate 4 concrete sentences:
|
||||
|
||||
1. `an example sentence with some optional words`
|
||||
2. `example sentence with some optional words`
|
||||
3. `an example sentence some optional words`
|
||||
4. `example sentence some optional words`
|
||||
|
||||
### Alternatives
|
||||
|
||||
A set of items, where only one is present at a time, is `(specified | like | this)`. For N items, there will be N sentences generated (unless you nest optional words, etc.). The template:
|
||||
|
||||
set the light to (red | green | blue)
|
||||
|
||||
will generate:
|
||||
|
||||
1. `set the light to red`
|
||||
2. `set the light to green`
|
||||
3. `set the light to blue`
|
||||
|
||||
### Rules
|
||||
|
||||
Rules allow you to reuse common phrases, alternatives, etc. Rules are defined by `rule_name = ...` alongside your sentences and referenced by `<rule_name>`. The template above with colors could be rewritten as:
|
||||
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
which will generate the same 4 sentences as above. Importantly, you can **share rules** across intents by prefixing the rule's name with the intent name followed by a dot:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue)
|
||||
set the light to <colors>
|
||||
|
||||
[GetLightColor]
|
||||
is the light <SetLightColor.colors>
|
||||
|
||||
The second intent (`GetLightColor`) references the `colors` rule from `SetLightColor`.
|
||||
|
||||
### Tags
|
||||
|
||||
The example templates above will generate sentences for training the speech recognizer, but using them to train the intent recognizer will not be satisfactory. The `SetLightColor` intent, when recognized, will result in a Home Assistant event called `rhasspy_SetLightColor`. But the actual *color* will not be provided because the intent recognizer is not aware that a `color` slot should exist (and has the values `red`, `green`, and `blue`).
|
||||
|
||||
Luckily, JSGF has a [tag feature](https://www.w3.org/TR/jsgf/#15057) that lets you annotate portions of sentences/rules. Rhasspy assumes that the tags themselves are *slot/entity names* and the tagged portions of the sentence are *slot/entity values*. The `SetLightColor` example can be extended with tags like this:
|
||||
|
||||
[SetLightColor]
|
||||
colors = (red | green | blue){color}
|
||||
set the light to <colors>
|
||||
|
||||
With the `{color}` tag attached to the `(red | green | blue)` alternative set, each color name will carry the tag. This is the same as typing `((red){color} | (green){color} | (blue){color})`, but less verbose. Rhasspy will now generate the following **tagged sentences**:
|
||||
|
||||
1. `set the light to [red](color)`
|
||||
2. `set the light to [green](color)`
|
||||
3. `set the light to [blue](color)`
|
||||
|
||||
When the `SetLightColor` intent is recognized now, the corresponding JSON event (`rhasspy_SetLightColor` in Home Assistant) will have the following properties:
|
||||
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
|
||||
A Home Assistant [automation](https://www.home-assistant.io/docs/automation) can use the slot values to take an appropriate action, such as [setting an RGB light's color](https://www.home-assistant.io/docs/automation/action/) to `[255,0,0]` (red).
|
||||
|
||||
#### Tag Synonyms
|
||||
|
||||
There are times where you want to match a particular part of your sentence with a tag, but want the actual *value* of the tag to be something different than the matched text. This is needed if you want to talk about entities in Home Assistant, for example, with phrases like "the living room lamp", but want to pass the appropriate entity id (say `lamp_1`) to Home Assistant instead.
|
||||
|
||||
Normally, you would tag part of a sentence like this:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name}
|
||||
|
||||
When this intent is activated, Rhasspy will send a JSON event (named `rhasspy_ChangeLightState` in Home Assistant) with:
|
||||
|
||||
{
|
||||
"name": "living room lamp"
|
||||
}
|
||||
|
||||
You can catch this event in a Home Assistant automation, match the `name` "living room name", and do something with the `lamp_1` entity. That's fine for one instance, but would require a separate rule for every `name`! Instead, let's add a tag **synonym**:
|
||||
|
||||
[ChangeLightState]
|
||||
turn on the (living room lamp){name:lamp_1}
|
||||
|
||||
The tag label and synonym are separated by a ":". When this sentence is spoken and the intent is activated, the same `rhasspy_ChangeLightState` event will be sent to Home Assistant, but with the following data:
|
||||
|
||||
{
|
||||
"name": "lamp_1"
|
||||
}
|
||||
|
||||
Now in your Home Assistant automation, you could use [templating](https://www.home-assistant.io/docs/automation/templating/) to plug the `name` directly into the `entity_id` field of an action. One rule to rule them all.
|
||||
|
||||
This same technique could be used to replace number words with digits, like:
|
||||
|
||||
[SetTimer]
|
||||
set a timer for (ten){number:10} seconds
|
||||
|
||||
which would generate an event like this when recognized:
|
||||
|
||||
{
|
||||
"number": "10"
|
||||
}
|
||||
|
||||
### Slots Lists
|
||||
|
||||
In the `SetLightColor` example above, the color names are stored in `sentences.ini` as a rule:
|
||||
|
||||
colors = (red | green | blue)
|
||||
|
||||
This is convenient when the list of colors is small, changes infrequently, and does not depend on an external service.
|
||||
But what if this was a list of movie names that were stored on your [Kodi Home Theater](https://kodi.tv)?
|
||||
|
||||
movies = ("Primer" | "Moon" | "Chronicle" | "Timecrimes" | "Mulholland Drive" | ... )
|
||||
|
||||
It would be much easier if this list was stored externally, but could be *referenced* in the appropriate places in the grammar.
|
||||
This is possible in Rhasspy by placing text files in the `speech_to_text.slots_dir` directory specified in your [profile](profiles.md) ("slots" by default).
|
||||
|
||||
If you're using the English (`en`) profile, for example, create the file `profiles/en/slots/movies` and add the following content:
|
||||
|
||||
Primer
|
||||
Moon
|
||||
Chronicle
|
||||
Timecrimes
|
||||
Mullholand Drive
|
||||
|
||||
This list of movie can now be referenced as `$movies` in your your `sentences.ini` file! Something like:
|
||||
|
||||
[PlayMovie]
|
||||
play ($movies){movie_name}
|
||||
|
||||
will generate `rhasspy_PlayMovie` events like:
|
||||
|
||||
{
|
||||
"movie_name": "Primer"
|
||||
}
|
||||
|
||||
If you update the `movies` file, make sure to re-train Rhasspy in order to pick up the new movie names.
|
||||
|
||||
### Special Cases
|
||||
|
||||
If one of your sentences happens to start with an optional word (e.g., `[the]`), this can lead to a problem:
|
||||
|
||||
[SomeIntent]
|
||||
[the] problem sentence
|
||||
|
||||
Python's [configparser](https://docs.python.org/3/library/configparser.html) will interpret `[the]` as a new section header, which will produce a new intent, grammar, etc. Rhasspy handles this special case by using a backslash escape sequence (`\[`):
|
||||
|
||||
[SomeIntent]
|
||||
\[the] problem sentence
|
||||
|
||||
Now `[the]` will be properly interpreted as a sentence under `[SomeIntent]`. You only need to escape a `[` if it's the **very first** character in your sentence.
|
||||
|
||||
## Custom Words
|
||||
|
||||
Rhasspy looks for words you've defined outside of your profile's base dictionary (typically `base_dictionary.txt`) in a custom words file (typically `custom_words.txt`). This is just a [CMU phonetic dictionary](https://cmusphinx.github.io/wiki/tutorialdict/) with words/pronunciations separated by newlines:
|
||||
@@ -232,170 +252,11 @@ Rhasspy looks for words you've defined outside of your profile's base dictionary
|
||||
|
||||
You can use the [Words tab](usage.md#words-tab) in Rhasspy's web interface to generate this dictionary. During training, Rhasspy will merge `custom_words.txt` into your `dictionary.txt` file so the [speech to text](speech-to-text.md) system knows the words in your voice commands are pronounced.
|
||||
|
||||
## Speech to Text
|
||||
|
||||
By default, Rhasspy generates training sentences from your [sentences.ini](#sentencesini) file, and then trains a custom language model using [opengrm](https://www.opengrm.org). You can call a **custom program** instead if you want to use a different language modeling toolkit or your custom speech to text system needs special training.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"speech_to_text": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When training, your program will be called with all of the training sentences grouped by intent in JSON to standard in. No output is expected from your program besides a successful exit code. **NOTE**: Rhasspy will not generate `dictionary.txt` or `language_model.txt` if you use a custom program.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
See [train-stt.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-stt.sh) for an example program.
|
||||
|
||||
## Intent Recognition
|
||||
|
||||
During training, Rhasspy uses the sentences generated from [sentences.ini](#sentencesini) as training material for the selected intent recognition system. If your intent recognition system requires some special training, you can call a **custom program** here.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
```json
|
||||
"training": {
|
||||
"intent": {
|
||||
"system": "command",
|
||||
"command": {
|
||||
"program": "/path/to/program",
|
||||
"arguments": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
During training, Rhasspy will call your program with the training sentences grouped by intent in JSON printed to standard in. No output is expected, besides a successful exit code.
|
||||
|
||||
The input JSON is an object where each key is the name of an intent and the values are lists of training sentence objects. Each sentence object has the text of the sentence, all tagged entities, and the tokens of the sentence.
|
||||
|
||||
Example input:
|
||||
|
||||
```json
|
||||
{
|
||||
"GetTime": [
|
||||
{
|
||||
"sentence": "what time is it",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"what",
|
||||
"time",
|
||||
"is",
|
||||
"it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"sentence": "tell me the time",
|
||||
"entities": [],
|
||||
"tokens": [
|
||||
"tell",
|
||||
"me",
|
||||
"the",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
],
|
||||
"ChangeLightColor": [
|
||||
{
|
||||
"sentence": "set the bedroom light to red",
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom light"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"set",
|
||||
"the",
|
||||
"bedroom",
|
||||
"light",
|
||||
"to",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
* `$RHASSPY_BASE_DIR` - path to the directory where Rhasspy is running from
|
||||
* `$RHASSPY_PROFILE` - name of the current profile (e.g., "en")
|
||||
* `$RHASSPY_PROFILE_DIR` - directory of the current profile (where `profile.json` is)
|
||||
|
||||
See [train-intent.sh](https://github.com/synesthesiam/rhasspy/blob/master/bin/mock-commands/train-intent.sh) for an example program.
|
||||
|
||||
|
||||
## Language Model Mixing
|
||||
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy offers it as an option.
|
||||
Rhasspy is designed to only respond to the voice commands you specify in [sentences.ini](training.md#sentencesini), but both the Pocketsphinx and Kaldi speech to text systems are capable of transcribing open ended speech. While this will never be as good as a cloud-based system, Rhasspy [offers it as an option](speech-to-text.md#open-transcription).
|
||||
|
||||
Open ended speech is achieved in Rhasspy by the inclusion of `base_dictionary.txt` and `base_language_model.txt` files in every profile. The former is a dictionary containing the pronunciations all possible words. The latter is a large language model trained on very large corpus of text in the profile's language (usually books and web pages).
|
||||
|
||||
During training, Rhasspy can **mix** this large, open ended language model with the one generated specifically for your voice commands. You specify a **mixture weight**, which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
A middle ground between open transcription and custom voice commands is **language model mixing**. During training, Rhasspy can mix a (large) pre-built language model with the custom-generated one. You specify a **mixture weight** (0-1), which controls how much of an influence the large language model has; a mixture weight of 0 makes Rhasspy sensitive *only* to your voice commands, which is the default.
|
||||
|
||||

|
||||
|
||||
@@ -468,15 +329,6 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"tokens": [
|
||||
"turn",
|
||||
"on",
|
||||
"the",
|
||||
"living",
|
||||
"room",
|
||||
"lamp"
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -486,7 +338,6 @@ $ echo 'would you please turn on the living room lamp' | \
|
||||
|
||||
But this works only because the default intent recognizer ([fsticuffs](intent-recognition.md#fsticuffs)) ignores unknown words by default, so "would you please" is not interpreted. Changing "lamp" to "light" in the input sentence will reveal the problem:
|
||||
|
||||
|
||||
```
|
||||
$ echo 'would you please turn on the living room light | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
@@ -499,7 +350,6 @@ $ echo 'would you please turn on the living room light | \
|
||||
"confidence": 0
|
||||
},
|
||||
"entities": [],
|
||||
"speech_confidence": 1,
|
||||
"slots": {}
|
||||
}
|
||||
}
|
||||
@@ -535,7 +385,6 @@ $ echo 'would you please turn on the living room light' | \
|
||||
"value": "on"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on"
|
||||
}
|
||||
@@ -545,4 +394,4 @@ $ echo 'would you please turn on the living room light' | \
|
||||
|
||||
This works well for our toy example, but will not scale well when there are thousands of voice commands represented in `sentences.ini` or if the words used are significantly different than in the training set ("light" and "lamp" are close enough for `fuzzywuzzy`).
|
||||
|
||||
A machine learning-based intent recognizer, like [flar](intent-recognition.md#flair), would be a better choice for open ended speech.
|
||||
A machine learning-based intent recognizer, like [flair](intent-recognition.md#flair) or [Rasa](intent-recognition.md#rasanlu), would be a better choice for open ended speech.
|
||||
|
||||
@@ -0,0 +1,224 @@
|
||||
# Tutorials
|
||||
|
||||
* [RGB Light Example](#rgb-light-example)
|
||||
* [Client/Server Setup](#clientserver-setup)
|
||||
|
||||
## RGB Light Example
|
||||
|
||||
Let's say you have an RGB light of some kind in your bedroom that's [hooked up already to Home Assistant](https://www.home-assistant.io/components/light.mqtt). You'd like to be able to say things like "*set the bedroom light to red*" to change its color. To start, let's write a [Home Assistant automation](https://www.home-assistant.io/docs/automation/action/) to help you out:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
...
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
Now you just need the trigger! Rhasspy will send events that can be caught with the [event trigger platform](https://www.home-assistant.io/docs/automation/trigger/#event-trigger). A different event will be sent for each *intent* that you define, with slot values corresponding to important parts of the command (like light name and color). Let's start by defining an intent in Rhasspy called `ChangeLightState` that can be said a few different ways:
|
||||
|
||||
[ChangeLightState]
|
||||
colors = (red | green | blue) {color}
|
||||
set [the] (bedroom){name} [to] <colors>
|
||||
|
||||
This is a [simplified JSGF grammar](training.md#sentencesini) that will generate the following sentences:
|
||||
|
||||
* set the bedroom to red
|
||||
* set the bedroom to green
|
||||
* set the bedroom to blue
|
||||
* set the bedroom red
|
||||
* set the bedroom green
|
||||
* set the bedroom blue
|
||||
* set bedroom to red
|
||||
* set bedroom to green
|
||||
* set bedroom to blue
|
||||
* set bedroom red
|
||||
* set bedroom green
|
||||
* set bedroom blue
|
||||
|
||||
Rhasspy uses these sentences to create an [ARPA language model](https://cmusphinx.github.io/wiki/arpaformat/) for speech recognition, and also train an intent recognizer that can extract relevant parts of the command. The `{color}` tag in the `colors` rule will make Rhasspy put a `color` property in each event with the name of the recognized color (red, green, or blue). Likewise, the `{name}` tag on `bedroom` will add a `name` property to the event.
|
||||
|
||||
If trained on these sentences, Rhasspy will now recognize commands like "*set the bedroom light to red*" and send a `rhasspy_ChangeLightState` to Home Assistant with the following data:
|
||||
|
||||
{
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
|
||||
You can now fill in the rest of the Home Assistant automation:
|
||||
|
||||
automation:
|
||||
# Change the light in the bedroom to red.
|
||||
trigger:
|
||||
platform: event
|
||||
event_type: rhasspy_ChangeLightState
|
||||
event_data:
|
||||
name: bedroom
|
||||
color: red
|
||||
action:
|
||||
service: light.turn_on
|
||||
data:
|
||||
rgb_color: [255, 0, 0]
|
||||
entity_id: light.bedroom
|
||||
|
||||
This will handle the specific case of setting the bedroom light to red, but not any other color. You can either add additional automations to handle these, or make use of [automation templating](https://www.home-assistant.io/docs/automation/templating/) to do it all at once.
|
||||
|
||||
## Client/Server Setup
|
||||
|
||||
Contributed by [jaburges](https://community.home-assistant.io/u/jaburges)
|
||||
|
||||
* Hardware used:
|
||||
* Raspberry Pi 3B w/ 8GB SD card
|
||||
* [Seeed 4 Mic Array](https://www.amazon.com/seeed-Studio-ReSpeaker-4-Mic-Raspberry/dp/B076SSR1W1)
|
||||
* Software used:
|
||||
* [Raspbian Buster Lite](https://downloads.raspberrypi.org/raspbian_lite_latest)
|
||||
* [Etcher](https://www.balena.io/etcher/)
|
||||
* Docker ([install Docker](installation.md#docker))
|
||||
|
||||
### Server Steps
|
||||
|
||||
1. Assuming you already have docker running, create a directory for Rhasspy, and subdirectory called profiles.
|
||||
2. Pull and Run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/<PATH_TO>/rhasspy/profiles:/profiles" \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
3. Go to server URL `http://<Server_IP>:12101` (you may be asked to download files)
|
||||
4. Go to settings and check configuration (and save along the way):
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = UNchecked
|
||||
|
||||
[Home Assistant]
|
||||
Do not use Home Assistant (note you obviously can instead of Node-Red)
|
||||
|
||||
[Wake Word]
|
||||
No Wake word on this device
|
||||
|
||||
[Voice Detection]
|
||||
No voice communication on this device
|
||||
|
||||
[Speech Recognition]
|
||||
Do Speech recognition with pocketsphinx
|
||||
|
||||
[Intent Recognition]
|
||||
Do intent recognition with fuzzywuzzy
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
No recording on this device
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
5. Check Slots, and Sentences tabs and make sure to hit `Train` and then `Restart`
|
||||
|
||||
### Client Steps
|
||||
|
||||
1. Flash 8Gb MicroSD Card with [Buster](https://downloads.raspberrypi.org/raspbian_lite_latest) with [Etcher](https://www.balena.io/etcher/).
|
||||
2. Remove and re-insert MicroSD card and add files to the root directory (for headless setup - meaning no screen needed). You only need `wpa_supplicant` if you plan to use WiFi.
|
||||
* a file simply called `ssh`
|
||||
* `wpa_supplicant.conf` ([example here](https://pastebin.com/cDhyhQLs))
|
||||
3. Insert the MicroSD card in the Pi, use a proper Power Supply and check your router for the IP address it gets.
|
||||
4. SSH into the Pi using that IP address (I use [Putty](https://the.earth.li/~sgtatham/putty/latest/w64/putty-64bit-0.73-installer.msi)) using pi default user/pass = pi/raspberry.
|
||||
You are going to want to change that in the future!
|
||||
5. Install git:
|
||||
|
||||
sudo apt install git
|
||||
|
||||
6. Install Seeed mic array based on info [here](https://github.com/respeaker/seeed-voicecard)
|
||||
|
||||
git clone https://github.com/respeaker/seeed-voicecard
|
||||
cd seeed-voicecard
|
||||
sudo ./install.sh
|
||||
sudo reboot
|
||||
|
||||
7. Plug in Seeed speaker and check install was successful against expected result here 5:
|
||||
|
||||
arecord -L
|
||||
|
||||
8. Install docker:
|
||||
|
||||
curl -sSL https://get.docker.com | sh
|
||||
|
||||
9. Modify user permissions to access docker without using `sudo` all the time ;)
|
||||
|
||||
sudo usermod -a -G docker pi
|
||||
|
||||
10. Close SSH, and relaunch SSH connection to use new permissions.
|
||||
11. Create directories for Rhasspy Docker image to use:
|
||||
|
||||
cd /home/pi
|
||||
mkdir rhasspy
|
||||
cd rhasspy
|
||||
mkdir profiles
|
||||
|
||||
12. Pull and run docker image:
|
||||
|
||||
docker run -p 12101:12101 \
|
||||
--restart unless-stopped \
|
||||
--name rhasspy \
|
||||
-v "/home/pi/rhasspy/profiles:/profiles" \
|
||||
--device /dev/snd:/dev/snd \
|
||||
synesthesiam/rhasspy-server:latest \
|
||||
--user-profiles /profiles \
|
||||
--profile en
|
||||
|
||||
13. Go to Client URL `http://<Pi_IP_address>:12101` (you will be asked to download some files)
|
||||
(At time of writing I put Wakeword, voice detection and recognition on the client)
|
||||
14. Under settings ensure the following is selected, Save along the way. You will need to Train once also.
|
||||
|
||||
[Rhasspy]
|
||||
Listen for wake word on Startup = checked
|
||||
|
||||
[Home Assistant]
|
||||
Do not use Home Assistant (note you obviously can instead of Node-Red)
|
||||
|
||||
[Wake Word]
|
||||
Use snowboy (this should trigger a download of more files)
|
||||
|
||||
[Voice Detection]
|
||||
Use webrtcvad and listen for silence
|
||||
|
||||
[Speech Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/speech-to-text
|
||||
|
||||
[Intent Recognition]
|
||||
Use Remote Rhasspy server for speech recognition:
|
||||
URL = http://<SERVER_IP>:12101/api/text-to-intent
|
||||
|
||||
[Text to Speech]
|
||||
No Text to speech on this device
|
||||
|
||||
[Audio Recording]
|
||||
Use PyAudio (default)
|
||||
Input Device = seeed-4mic-voicecard (you can test this if you want)
|
||||
|
||||
[Audio Playing]
|
||||
No Playback on this device
|
||||
|
||||
### Node-Red Config
|
||||
|
||||
1. Import [this flow](https://github.com/synesthesiam/rhasspy/blob/cda3a02775865d49b52d32a3af7264b7cbd69472/examples/nodered/time-light-flow.js) from the Rhasspy examples
|
||||
2. Attach a debug node to the websocket in and configure it to show full msg object.
|
||||
3. I edited light text node to take this:
|
||||
|
||||
{
|
||||
"domain": "light",
|
||||
"service": "turn_{{slots.state}}",
|
||||
"entity_id": "{{slots.name}}"
|
||||
}
|
||||
|
||||
4. Add a call service node after the light text and leave it blank. Deploy and Enjoy offline voice assistant.
|
||||
|
||||
Pick a light (that is a light domain not a switch), and say "Snowboy, turn bedroom light off" :)
|
||||
@@ -1,11 +1,31 @@
|
||||
# Usage
|
||||
|
||||
You can interact with Rhasspy in different ways besides just your voice. Rhasspy includes a [web interface](#web-inteface), typically hosted on port 12101. There is also an [HTTP API](#http-api) that lets you programmatically manipulate Rhasspy from external programs or services. A [command-line interface](#command-line) is available as well to allow for Rhasspy to be easily included in shell scripts. Lastly, Rhasspy subscribes and publishes to specific [MQTT topics](#mqtt) in accordance with (a portion of) the [Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol).
|
||||
You can interact with Rhasspy in more ways than your voice:
|
||||
|
||||
* [Web Interface](#web-interface)
|
||||
* [Home Assistant](#home-assistant)
|
||||
* [Node-RED with Websockets](#node-red)
|
||||
* [MQTT and Snips](#mqtt-and-snips)
|
||||
* [HTTP API](#http-api)
|
||||
* [Command Line](#command-line)
|
||||
|
||||
## Web Interface
|
||||
|
||||
A browser-based interface for Rhasspy is available on port 12101 by default ([http://localhost:12101](http://localhost:12101) if running locally). From this interface, you can test voice commands, add new voice commands, re-train, and edit your profile.
|
||||
|
||||
### Top Bar
|
||||
|
||||
The top bar of the web interface lets you perform some global actions on Rhasspy, regardless of which tab you have selected.
|
||||
|
||||

|
||||
|
||||
* Click the Rhasspy logo to reload the page
|
||||
* Click the version number to test the [HTTP API](#http-api)
|
||||
* The green `Train` button will re-train your profile
|
||||
* Use the `Clear Cache` drop down to train from scratch
|
||||
* The yellow `Wake` button will wake Rhasspy up and start listening for a voice command
|
||||
* The red `Restart` button forces Rhasspy to restart
|
||||
|
||||
### Speech Tab
|
||||
|
||||
Test voice and text commands.
|
||||
@@ -14,17 +34,28 @@ Test voice and text commands.
|
||||
|
||||
* Record a voice command with `Hold to Record` or `Tap to Record`
|
||||
* Upload a WAV file with a voice command
|
||||
* Enter a text command and execute it
|
||||
* Enter a text command and either execute it (`Get Intent`) or `Speak` the sentence
|
||||
* Uncheck `Send to Home Assistant` if you **don't** want Rhasspy to send events to Home Assistant
|
||||
|
||||
### Sentences Tab
|
||||
|
||||
Add new voice commands to Rhasspy.
|
||||
Add new voice commands to Rhasspy using the [template syntax](training.md#sentencesini).
|
||||
|
||||

|
||||
|
||||
See documentation on [sentences.ini](training.md#sentencesini) for more information.
|
||||
Make sure to re-train after saving!
|
||||
* Edits `sentences.ini` by default
|
||||
* Use the `Add File` button to create additional sentence template files
|
||||
* These should be prefixed by the `sentences_dir` in your [profile](profiles.md). For example, `intents/more-commands.ini`
|
||||
* The drop down can be used to switch editing between different template files
|
||||
|
||||
### Slots Tab
|
||||
|
||||
Edit your [slots lists](training.md#slots-lists) as JSON (keys = slot names, values = lists of slot values).
|
||||
|
||||

|
||||
|
||||
* New slot values will overwrite previous ones
|
||||
* Delete a slot by providing an empty list for its JSON key
|
||||
|
||||
### Words Tab
|
||||
|
||||
@@ -57,83 +88,11 @@ Direct interface for editing your [profile](profiles.md).
|
||||
|
||||

|
||||
|
||||
## HTTP API
|
||||
### Log Tab
|
||||
|
||||
Rhasspy features a comprehensive HTTP API available at `/api`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). Some notable endpoints are:
|
||||
Streams Rhasspy's log output over a websocket.
|
||||
|
||||
* `/api/profile`
|
||||
* GET the JSON for your profile, or POST to overwrite it
|
||||
* `/api/listen-for-command`
|
||||
* POST to wake Rhasspy up and start listening for a voice command
|
||||
* `/api/start-recording`
|
||||
* POST to have Rhasspy start recording a voice command
|
||||
* `/api/stop-recording`
|
||||
* POST to have Rhasspy stop recording and process recorded data as a voice command
|
||||
* `/api/train`
|
||||
* POST to re-train your profile
|
||||
* `/api/speech-to-intent`
|
||||
* POST a WAV file and have Rhasspy process it as a voice command
|
||||
* `/api/text-to-intent`
|
||||
* POST text and have Rhasspy process it as command
|
||||
* `/api/text-to-speech`
|
||||
* POST text and have Rhasspy speak it
|
||||
* `/api/slots`
|
||||
* POST JSON to update [slot values](training.md#slots-lists)
|
||||
|
||||
See `public/swagger.yaml` in Rhasspy's repository for all available endpoints, or visit `/api` on your Rhasspy web server (e.g., [http://localhost:12101/api](http://localhost:12101/api)).
|
||||
|
||||
## Secure Hosting with HTTPS
|
||||
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.io configuration.
|
||||
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
|
||||
You can generate a self-signed certificate with the following command:
|
||||
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://localhost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
In Hass.io, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
|
||||
## WebSocket Events
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
|
||||
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "set the bedroom light to red",
|
||||
"intent": {
|
||||
"name": "ChangeLightColor",
|
||||
"confidence": 1
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"slots": {
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
}
|
||||
```
|
||||

|
||||
|
||||
## Home Assistant
|
||||
|
||||
@@ -164,6 +123,13 @@ automation:
|
||||
|
||||
You've now added offline, private voice commands to your Home Assistant. Happy automating!
|
||||
|
||||
### Getting the Spoken Text
|
||||
|
||||
The Home Assistant event will contain two extra slots besides the ones you specify:
|
||||
|
||||
* `_text` - spoken voice command text with [substitutions](training.md#substitutions)
|
||||
* `_raw_text` - literal transcription of voice command
|
||||
|
||||
## Node-RED
|
||||
|
||||
Rhasspy can interact directly with [Node-RED](https://nodered.org) directly through [websockets](usage.md#websocket-events).
|
||||
@@ -174,23 +140,90 @@ Make sure to also set send/receive to "entire message".
|
||||
|
||||
More example flows are available [on Github](https://github.com/synesthesiam/rhasspy/tree/master/examples/nodered).
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Whenever a voice command is recognized, Rhasspy emits JSON events over a websocket connection available at `ws://rhasspy:12101/api/events/intent` (replace `ws://` with `wss://` if you're using [secure hosting](usage.md#secure-hosting-with-https)).
|
||||
You can listen to these events in a [Node-RED](https://nodered.org) flow, and easily add offline, private voice commands to your home automation set up!
|
||||
|
||||
For the `ChangLightState` intent from the [RGB Light Example](index.md#rgb-light-example), Rhasspy will emit a JSON event like this over the websocket:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "set the bedroom light to red",
|
||||
"intent": {
|
||||
"name": "ChangeLightColor",
|
||||
"confidence": 1
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "bedroom"
|
||||
},
|
||||
{
|
||||
"entity": "color",
|
||||
"value": "red"
|
||||
}
|
||||
],
|
||||
"slots": {
|
||||
"name": "bedroom",
|
||||
"color": "red"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## MQTT and Snips
|
||||
|
||||
Rhasspy is able to interoperate with Snips.AI services using the [Hermes protocol](https://docs.snips.ai/reference/hermes) over [MQTT](http://mqtt.org). The following components are Snips/Hermes compatible:
|
||||
|
||||
* [Microphone input](audio-input.md#mqtthermes)
|
||||
* [Wake word](wake-word.md#mqtthermes)
|
||||
* [Speech to text](speech-to-text.md#mqtthermes)
|
||||
* [Intent recognition](intent-recognition.md#mqtthermes)
|
||||
* [Audio output](audio-output.md#mqtthermes)
|
||||
|
||||
## HTTP API
|
||||
|
||||
Rhasspy features a comprehensive HTTP API available at `/api/`, documented with [OpenAPI 3](https://github.com/OAI/OpenAPI-Specification) (Swagger). See the [HTTP API reference](reference.md#http-api) for more details.
|
||||
|
||||
### Secure Hosting with HTTPS
|
||||
|
||||
If you need to access Rhasspy's web interface/API through HTTPS (formally SSL), you can provide a certificate and key file via command-line parameters or the Hass.io configuration.
|
||||
|
||||
If you're running Rhasspy via Docker or in a virtual environment, add `--ssl <CERT_FILE> <KEY_FILE>` to the command-line arguments where `<CERT_FILE>` is your SSL certificate and `<KEY_FILE>` is your SSL key file.
|
||||
|
||||
You can generate a self-signed certificate with the following command:
|
||||
|
||||
openssl req -x509 -newkey rsa:4096 -nodes -out cert.pem -keyout key.pem -days 365
|
||||
|
||||
After answering the series of questions, you should have `cert.pem` and `key.pem` in your current directory. Then run Rhasspy with:
|
||||
|
||||
<RHASSPY COMMAND> --ssl cert.pem key.pem
|
||||
|
||||
The web interface will now be available at [https://localhost:12101](https://localhost:12101) and the web socket events at `wss://localhost:12101/api/events/intent`
|
||||
|
||||
In Hass.io, you will need to set the following options via the web interface or in your JSON configuration:
|
||||
|
||||
* `ssl`: `true`
|
||||
* `certfile`: `cert.pem`
|
||||
* `keyfile`: `key.pem`
|
||||
|
||||
## Command Line
|
||||
|
||||
You can access portions of Rhasspy's functionality without running a web server through the command-line interface.
|
||||
The `rhasspy` Python module runs this interface in its `__main__`, so it's accessible from Rhasspy's source code directory by running:
|
||||
|
||||
python3 -m rhasspy <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
This will only work inside a properly set up [virtual environment](installation.md#virtual-environment), however.
|
||||
If you run Rhasspy through [Docker](installation.md#docker), the [rhasspy-cli](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli) script should be used instead:
|
||||
|
||||
wget https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli
|
||||
chmod +x rhasspy-cli
|
||||
./rhasspy-cli --help
|
||||
|
||||
|
||||
Put this script in your `~/bin` directory so that you can refer to it as `rhasspy-cli` from any directory.
|
||||
By default, it will look for profiles in `$XDG_CONFIG_FILE/rhasspy/profiles`, which is probably `~/.config/rhasspy/profiles` (see [XDG specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for more information).
|
||||
|
||||
|
||||
**Beware**: the `rhasspy-cli` script runs under your user account and grants Rhasspy **write access to your home directory**.
|
||||
This is needed to save files during the training process, and to avoid those files being owned by `root`.
|
||||
The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rhasspy-cli-ro) script can be used for read only operations, such as speech to text or intent handling, but cannot make any changes to your file system.
|
||||
@@ -200,240 +233,13 @@ The [rhasspy-cli-ro](https://github.com/synesthesiam/rhasspy/blob/master/bin/rha
|
||||
The `rhasspy-cli` script takes a command and a set of arguments:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
Adding `--debug` before the command will print additional information to the console:
|
||||
|
||||
rhasspy-cli --debug --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>
|
||||
|
||||
|
||||
You can override profile settings with `--set` like this:
|
||||
|
||||
rhasspy-cli --profile <PROFILE_NAME> --set <SETTING_NAME> <SETTING_VALUE> ... <COMMAND> <ARGUMENTS>
|
||||
|
||||
### Available Commands
|
||||
|
||||
For `rhasspy-cli --profile <PROFILE_NAME> <COMMAND> <ARGUMENTS>`, `<COMMAND>` can be:
|
||||
|
||||
* `info`
|
||||
* Print profile JSON to standard out
|
||||
* Add `--defaults` to only print settings from `defaults.json`
|
||||
* `wav2text`
|
||||
* Convert WAV file(s) to text
|
||||
* `wav2intent`
|
||||
* Convert WAV file(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `text2intent`
|
||||
* Convert text command(s) to intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* `train`
|
||||
* Re-train your profile
|
||||
* `mic2wav`
|
||||
* Listen for a voice command and output WAV data
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2text`
|
||||
* Listen for a voice command and convert it to text
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `mic2intent`
|
||||
* Listen for a voice command output intent JSON
|
||||
* Add `--handle` to have Rhasspy send events to Home Assistant
|
||||
* Add `--timeout <SECONDS>` to stop recording after some number of seconds
|
||||
* `word2phonemes`
|
||||
* Print the CMU phonemes for a word (possibly unknown)
|
||||
* Add `-n <COUNT>` to control the maximum number of guessed pronunciations
|
||||
* `word2wav`
|
||||
* Pronounce a word (possibly unknown) and output WAV data
|
||||
* `text2speech`
|
||||
* Speaks one or more sentences using Rhasspy's text to speech system
|
||||
* `text2wav`
|
||||
* Converts a single sentence to WAV using Rhasspy's text to speech system
|
||||
* `sleep`
|
||||
* Run Rhasspy and wait until wake word is spoken
|
||||
* `download`
|
||||
* Download necessary profile files from the internet
|
||||
|
||||
### Profile Operations
|
||||
|
||||
Print the complete JSON for the English profile with:
|
||||
|
||||
rhasspy-cli --profile en info
|
||||
|
||||
You can combine this with other commands, such as `jq` to get at specific pieces:
|
||||
|
||||
rhasspy-cli info --profile en | jq .wake.pocketsphinx.keyphrase
|
||||
|
||||
Output (JSON):
|
||||
|
||||
"okay rhasspy"
|
||||
|
||||
### Training
|
||||
|
||||
Retrain your the English profile with:
|
||||
|
||||
rhasspy-cli --profile en train
|
||||
|
||||
Add `--debug` before `train` for more information.
|
||||
|
||||
### Speech to Text/Intent
|
||||
|
||||
Convert a WAV file to text from stdin:
|
||||
|
||||
rhasspy-cli --profile en wav2text < what-time-is-it.wav
|
||||
|
||||
Output (text):
|
||||
|
||||
what time is it
|
||||
|
||||
Convert multiple WAV files:
|
||||
|
||||
rhasspy-cli --profile en wav2text what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"what-time-is-it.wav": "what time is it",
|
||||
"turn-on-the-living-room-lamp.wav": "turn on the living room lamp"
|
||||
}
|
||||
```
|
||||
|
||||
Convert multiple WAV file(s) to intents **and** handle them:
|
||||
|
||||
rhasspy-cli --profile en wav2intent --handle what-time-is-it.wav turn-on-the-living-room-lamp.wav
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"what_time_is_it.wav": {
|
||||
"text": "what time is it",
|
||||
"intent": {
|
||||
"name": "GetTime",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": []
|
||||
},
|
||||
"turn_on_living_room_lamp.wav": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Text to Intent
|
||||
|
||||
Handle a command as if it was spoken:
|
||||
|
||||
rhasspy-cli --profile en text2intent --handle "turn off the living room lamp"
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn off the living room lamp": {
|
||||
"text": "turn off the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "off"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Record Your Voice
|
||||
|
||||
Save a voice command to a WAV:
|
||||
|
||||
rhasspy-cli --profile en mic2wav > my-voice-command.wav
|
||||
|
||||
You can listen to it with:
|
||||
|
||||
aplay my-voice-command.wav
|
||||
|
||||
### Test Your Wake Word
|
||||
|
||||
Start Rhasspy and wait for wake word:
|
||||
|
||||
rhasspy-cli --profile en sleep
|
||||
|
||||
Should exit and print the wake word when its spoken.
|
||||
|
||||
### Text to Speech
|
||||
|
||||
Have Rhasspy speak one or more sentences:
|
||||
|
||||
rhasspy-cli --profile en text2speech "We ride at dawn!"
|
||||
|
||||
Use a different text to speech system and voice:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'flite' \
|
||||
--set 'text_to_speech.flite.voice' 'slt' \
|
||||
text2speech "We ride at dawn!"
|
||||
|
||||
### Pronounce Words
|
||||
|
||||
Speak words Rhasspy doesn't know!
|
||||
|
||||
rhasspy-cli --profile en word2wav raxacoricofallapatorius | aplay
|
||||
|
||||
### Text to Speech to Text to Intent
|
||||
|
||||
Use the miracle of Unix pipes to have Rhasspy interpret voice commands from itself:
|
||||
|
||||
rhasspy-cli --profile en \
|
||||
--set 'text_to_speech.system' 'picotts' \
|
||||
text2wav "turn on the living room lamp" | \
|
||||
rhasspy-cli --profile en wav2text | \
|
||||
rhasspy-cli --profile en text2intent
|
||||
|
||||
|
||||
Output (JSON):
|
||||
|
||||
```json
|
||||
{
|
||||
"turn on the living room lamp": {
|
||||
"text": "turn on the living room lamp",
|
||||
"intent": {
|
||||
"name": "ChangeLightState",
|
||||
"confidence": 1.0
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": "state",
|
||||
"value": "on"
|
||||
},
|
||||
{
|
||||
"entity": "name",
|
||||
"value": "living room lamp"
|
||||
}
|
||||
],
|
||||
"speech_confidence": 1,
|
||||
"slots": {
|
||||
"state": "on",
|
||||
"name": "living room lamp"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
See the [command-line reference](reference.md#command-line) for available commands.
|
||||
|
||||
@@ -34,7 +34,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
There are a lot of [keyword files](https://github.com/Picovoice/Porcupine/tree/master/resources/keyword_files) available for download. Use the `linux` platform if you're on desktop/laptop (`amd64`) and the `raspberrypi` platform if you're using a Raspberry Pi (`armhf`/`aarch64`). The `.ppn` files should go in the `porcupine` directory inside your profile (referenced by `keyword_path`).
|
||||
|
||||
If you want to create a custom wake word, you will need to run the [Porcupine Optimizer](https://github.com/Picovoice/Porcupine/tree/master/tools/optimizer). **NOTE**: the generated keyword file is only valid for 30 days, though you can always just re-run the optimizer.
|
||||
@@ -43,7 +43,7 @@ See `rhasspy.wake.PorcupineWakeListener` for details.
|
||||
|
||||
## Snowboy
|
||||
|
||||
Listens for a wake word with [snowboy](https://snowboy.kitt.ai). This system has the good performance out of the box, but requires an online service to train.
|
||||
Listens for one or more wake words with [snowboy](https://snowboy.kitt.ai). This system has the good performance out of the box, but requires an online service to train.
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -54,10 +54,10 @@ Add to your [profile](profiles.md):
|
||||
"wakeword_id": "default"
|
||||
},
|
||||
"snowboy": {
|
||||
"model": "model-name-in-profile.(u|p)mdl",
|
||||
"model": "snowboy/snowboy.umdl",
|
||||
"audio_gain": 1,
|
||||
"sensitivity": 0.5,
|
||||
"chunk_size": 960
|
||||
"sensitivity": "0.5",
|
||||
"apply_frontend": false
|
||||
}
|
||||
},
|
||||
|
||||
@@ -65,10 +65,41 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
If your hotword model has multiple embedded hotwords (such as `jarvis.umdl`), the "sensitivity" parameter should contain sensitivities for each embedded hotword separated by commas (e.g., "0.5,0.5").
|
||||
|
||||
Visit [the snowboy website](https://snowboy.kitt.ai) to train your own wake word model (requires linking to a GitHub/Google/Facebook account). This *personal* model with end with `.pmdl`, and should go in your profile directory. Then, set `wake.snowboy.model` to the name of that file.
|
||||
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models). I've received errors using anything but `snowboy.umdl`, but YMMV.
|
||||
You also have the option of using a pre-train *universal* model (`.umdl`) from [Kitt.AI](https://github.com/Kitt-AI/snowboy/tree/master/resources/models).
|
||||
|
||||
### Multiple Wake Words
|
||||
|
||||
You can have `snowboy` listen for multiple wake words with different models, each with their own settings. You will need to download each model file to the `snowboy` directory in your profile.
|
||||
|
||||
For example, to use both the `snowboy.umdl` and `jarvis.umdl` models, add this to your profile:
|
||||
|
||||
```json
|
||||
"wake": {
|
||||
"system": "snowboy",
|
||||
"snowboy": {
|
||||
"model": "snowboy/snowboy.umdl,snowboy/jarvis.umdl",
|
||||
"model_settings": {
|
||||
"snowboy/snowboy.umdl": {
|
||||
"sensitivity": "0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
},
|
||||
"snowboy/jarvis.umdl": {
|
||||
"sensitivity": "0.5,0.5",
|
||||
"audio_gain": 1,
|
||||
"apply_frontend": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Make sure to include all models you want in the `model` setting (separated by commas). Each model may have different settings in `model_settings`. If a setting is not present, the default values under `snowboy` will be used.
|
||||
|
||||
See `rhasspy.wake.SnowboyWakeListener` for details.
|
||||
|
||||
@@ -92,7 +123,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Set `wake.pocketsphinx.keyphrase` to whatever you like, though 3-4 syllables is recommended. Make sure to [train](training.md) and restart Rhasspy whenever you change the keyphrase.
|
||||
|
||||
The `wake.pocketsphinx.threshold` should be in the range 1e-50 to 1e-5. The smaller the number, the less like the keyphrase is to be observed. At least one person has written a script to [automatically tune the threshold](https://medium.com/@PankajB96/automatic-tuning-of-keyword-spotting-thresholds-a27256869d31).
|
||||
@@ -120,14 +151,14 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Follow [the instructions from Mycroft AI](https://github.com/MycroftAI/mycroft-precise/wiki/Training-your-own-wake-word#how-to-train-your-own-wake-word) to train your own wake word model. When you're finished, place **both** the `.pb` and `.pb.params` files in your profile directory, and set `wake.precise.model` to the name of the `.pb` file.
|
||||
|
||||
|
||||
See `rhasspy.wake.PreciseWakeListener` for details.
|
||||
|
||||
## MQTT/Hermes
|
||||
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/ressources/hermes-protocol)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
Subscribes to the `hermes/hotword/<WAKEWORD_ID>/detected` topic, and wakes Rhasspy up when a message is received ([Hermes protocol](https://docs.snips.ai/reference/hermes)). This allows Rhasspy to use the wake word functionality in [Snips.AI](https://snips.ai/).
|
||||
|
||||
Add to your [profile](profiles.md):
|
||||
|
||||
@@ -153,7 +184,7 @@ Add to your [profile](profiles.md):
|
||||
"site_id": "default"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Adjust the `mqtt` configuration to connect to your MQTT broker.
|
||||
Set `mqtt.site_id` to match your Snips.AI siteId and `wake.hermes.wakeword_id` to match your Snips.AI wakewordId.
|
||||
|
||||
@@ -178,7 +209,7 @@ Add to your [profile](profiles.md):
|
||||
"listen_on_start": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
When Rhasspy starts, your program will be called with the given arguments. Once your program detects the wake word, it should print it to standard out and exit. Rhasspy will call your program again when it goes back to sleep. If the empty string is printed, Rhasspy will **not** wake up and your program will be called again.
|
||||
|
||||
The following environment variables are available to your program:
|
||||
|
||||
@@ -13,7 +13,7 @@ DEFINE_boolean 'precise' true 'Install Mycroft Precise'
|
||||
DEFINE_boolean 'kaldi' true 'Install Kaldi'
|
||||
DEFINE_boolean 'offline' false "Don't download anything"
|
||||
DEFINE_boolean 'all-cpu' false 'Download dependencies for all CPU architectures'
|
||||
DEFINE_string 'cpu-arch' "${cpu_arch}" 'CPU architecture (x86_64, armv7l, arm64v8)'
|
||||
DEFINE_string 'cpu-arch' "${cpu_arch}" 'CPU architecture (x86_64, armv7l, arm64v8, armv6l)'
|
||||
|
||||
FLAGS "$@" || exit $?
|
||||
eval set -- "${FLAGS_ARGV}"
|
||||
@@ -47,14 +47,14 @@ fi
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
function maybe_download {
|
||||
if [[ ! -f "$2" ]]; then
|
||||
if [[ ! -z "${offline}" ]]; then
|
||||
if [[ ! -s "$2" ]]; then
|
||||
if [[ -n "${offline}" ]]; then
|
||||
echo "Need to download $1 but offline."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$2")"
|
||||
curl -sSfL -o "$2" "$1"
|
||||
curl -sSfL -o "$2" "$1" || { echo "Can't download $1"; exit 1; }
|
||||
echo "$1 => $2"
|
||||
fi
|
||||
}
|
||||
@@ -65,9 +65,10 @@ declare -A CPU_TO_FRIENDLY
|
||||
CPU_TO_FRIENDLY["x86_64"]="amd64"
|
||||
CPU_TO_FRIENDLY["armv7l"]="armhf"
|
||||
CPU_TO_FRIENDLY["arm64v8"]="aarch64"
|
||||
CPU_TO_FRIENDLY["armv6l"]="armv6l"
|
||||
|
||||
# CPU architecture
|
||||
if [[ ! -z "${all_cpu}" ]]; then
|
||||
if [[ -n "${all_cpu}" ]]; then
|
||||
CPU_ARCHS=("x86_64" "armv7l" "arm64v8")
|
||||
FRIENDLY_ARCHS=("amd64" "armhf" "aarch64")
|
||||
else
|
||||
@@ -79,10 +80,9 @@ fi
|
||||
# Rhasspy
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}";
|
||||
do
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
rhasspy_files=("rhasspy-tools_${FRIENDLY_ARCH}.tar.gz" "rhasspy-web-dist.tar.gz")
|
||||
for rhasspy_file_name in "${rhasspy_files}"; do
|
||||
for rhasspy_file_name in "${rhasspy_files[@]}"; do
|
||||
rhasspy_file="${download_dir}/${rhasspy_file_name}"
|
||||
rhasspy_file_url="https://github.com/synesthesiam/rhasspy/releases/download/v2.0/${rhasspy_file_name}"
|
||||
maybe_download "${rhasspy_file_url}" "${rhasspy_file}"
|
||||
@@ -110,8 +110,7 @@ maybe_download "${snowboy_url}" "${snowboy_file}"
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_precise}" ]]; then
|
||||
for CPU_ARCH in "${CPU_ARCHS}";
|
||||
do
|
||||
for CPU_ARCH in "${CPU_ARCHS[@]}"; do
|
||||
case $CPU_ARCH in
|
||||
x86_64|armv7l)
|
||||
precise_file="${download_dir}/precise-engine_0.3.0_${CPU_ARCH}.tar.gz"
|
||||
@@ -126,8 +125,7 @@ fi
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if [[ -z "${no_kaldi}" ]]; then
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS}"
|
||||
do
|
||||
for FRIENDLY_ARCH in "${FRIENDLY_ARCHS[@]}"; do
|
||||
# Install pre-built package
|
||||
kaldi_file="${download_dir}/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
kaldi_url="https://github.com/synesthesiam/kaldi-docker/releases/download/v1.0/kaldi_${FRIENDLY_ARCH}.tar.gz"
|
||||
|
||||
@@ -43,7 +43,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -53,7 +53,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
default_view:
|
||||
view: yes
|
||||
view: true
|
||||
entities:
|
||||
- group.inside
|
||||
- group.garage
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
# ATLAS specific Linux ARM configuration
|
||||
|
||||
ifndef DOUBLE_PRECISION
|
||||
$(error DOUBLE_PRECISION not defined.)
|
||||
endif
|
||||
ifndef OPENFSTINC
|
||||
$(error OPENFSTINC not defined.)
|
||||
endif
|
||||
ifndef OPENFSTLIBS
|
||||
$(error OPENFSTLIBS not defined.)
|
||||
endif
|
||||
ifndef ATLASINC
|
||||
$(error ATLASINC not defined.)
|
||||
endif
|
||||
ifndef ATLASLIBS
|
||||
$(error ATLASLIBS not defined.)
|
||||
endif
|
||||
|
||||
CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
|
||||
-Wall -Wno-sign-compare -Wno-unused-local-typedefs \
|
||||
-Wno-deprecated-declarations -Winit-self \
|
||||
-DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
|
||||
-DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \
|
||||
-ftree-vectorize -pthread \
|
||||
-g # -O0 -DKALDI_PARANOID
|
||||
|
||||
ifeq ($(KALDI_FLAVOR), dynamic)
|
||||
CXXFLAGS += -fPIC
|
||||
endif
|
||||
|
||||
# Compiler specific flags
|
||||
COMPILER = $(shell $(CXX) -v 2>&1)
|
||||
ifeq ($(findstring clang,$(COMPILER)),clang)
|
||||
# Suppress annoying clang warnings that are perfectly valid per spec.
|
||||
CXXFLAGS += -Wno-mismatched-tags
|
||||
endif
|
||||
|
||||
LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic
|
||||
LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl
|
||||
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=Rhasspy
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/home/<USER>/path/to/rhasspy
|
||||
ExecStart=/bin/bash -lc './run-venv.sh --profile <LANGUAGE>'
|
||||
|
||||
RestartSec=1
|
||||
Restart=on-failure
|
||||
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
|
||||
SyslogIdentifier=rhasspy
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -40,7 +40,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -50,7 +50,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -75,7 +75,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -85,7 +85,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -42,7 +42,7 @@ switch:
|
||||
command_on: "echo 'Living room lamp ON'"
|
||||
command_off: "echo 'Living room lamp OFF'"
|
||||
garage_light:
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_on: "echo 'Garage light ON'"
|
||||
command_off: "echo 'Garage light OFF'"
|
||||
|
||||
# Doors
|
||||
@@ -52,7 +52,7 @@ binary_sensor:
|
||||
command: "bash -c 'sec=$(date +%s); [[ $(($sec % 2)) -eq 0 ]] && echo open || echo closed'"
|
||||
payload_on: "closed"
|
||||
payload_off: "open"
|
||||
|
||||
|
||||
# Temperature
|
||||
sensor:
|
||||
- platform: command_line
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
if [[ -z "$(which phonetisaurus-train)" ]]; then
|
||||
if [[ -z "$(command -v phonetisaurus-train)" ]]; then
|
||||
echo "Phonetisaurus not installed!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -4,6 +4,7 @@ nav:
|
||||
- Home: index.md
|
||||
- Hardware: hardware.md
|
||||
- Installation: installation.md
|
||||
- Tutorials: tutorials.md
|
||||
- Usage: usage.md
|
||||
- Profiles: profiles.md
|
||||
- Training: training.md
|
||||
@@ -15,5 +16,7 @@ nav:
|
||||
- Intent Recognition: intent-recognition.md
|
||||
- Intent Handling: intent-handling.md
|
||||
- Text to Speech: text-to-speech.md
|
||||
- Reference: reference.md
|
||||
- Development: development.md
|
||||
- License: license.md
|
||||
- About: about.md
|
||||
|
||||
@@ -58,4 +58,22 @@ ignore_missing_imports = True
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-google.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-networkx.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-num2words.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-doit.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-json5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-quart_cors.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-swagger_ui.*]
|
||||
ignore_missing_imports = True
|
||||
@@ -7,6 +7,7 @@
|
||||
"kaldi": {
|
||||
"base_dictionary": "kaldi/base_dictionary.txt",
|
||||
"base_language_model": "kaldi/base_language_model.txt",
|
||||
"base_language_model_fst": "kaldi/base_language_model.fst",
|
||||
"compatible": true,
|
||||
"custom_words": "kaldi/custom_words.txt",
|
||||
"dictionary": "kaldi/dictionary.txt",
|
||||
|
||||
@@ -28,14 +28,18 @@
|
||||
"program": ""
|
||||
},
|
||||
"forward_to_hass": false,
|
||||
"system": "dummy"
|
||||
"system": "dummy",
|
||||
"remote": {
|
||||
"url": "http://my-server:port/endpoint"
|
||||
},
|
||||
},
|
||||
"home_assistant": {
|
||||
"access_token": "",
|
||||
"api_password": "",
|
||||
"event_type_format": "rhasspy_{0}",
|
||||
"pem_file": "",
|
||||
"url": "http://hassio/homeassistant/"
|
||||
"url": "http://hassio/homeassistant/",
|
||||
"handle_type": "event"
|
||||
},
|
||||
"intent": {
|
||||
"adapt": {
|
||||
@@ -81,11 +85,13 @@
|
||||
"microphone": {
|
||||
"arecord": {
|
||||
"chunk_size": 960,
|
||||
"device": ""
|
||||
"device": "",
|
||||
"keep_device_open": true
|
||||
},
|
||||
"pyaudio": {
|
||||
"device": "",
|
||||
"frames_per_buffer": 480
|
||||
"frames_per_buffer": 480,
|
||||
"keep_device_open": true
|
||||
},
|
||||
"stdin": {
|
||||
"auto_start": true,
|
||||
@@ -170,7 +176,15 @@
|
||||
"remote": {
|
||||
"url": "http://my-server:12101/api/speech-to-text"
|
||||
},
|
||||
"hass_stt": {
|
||||
"platform": "",
|
||||
"sample_rate": 16000,
|
||||
"bit_size": 16,
|
||||
"channels": 1,
|
||||
"language": "en-US"
|
||||
},
|
||||
"sentences_ini": "sentences.ini",
|
||||
"sentences_dir": "intents",
|
||||
"slots_dir": "slots",
|
||||
"system": "dummy"
|
||||
},
|
||||
@@ -197,6 +211,9 @@
|
||||
"url": "https://texttospeech.googleapis.com/v1/text:synthesize",
|
||||
"voice": "Wavenet-C",
|
||||
"fallback_tts": "espeak"
|
||||
},
|
||||
"hass_tts": {
|
||||
"platform": ""
|
||||
}
|
||||
},
|
||||
"training": {
|
||||
@@ -256,7 +273,8 @@
|
||||
"audio_gain": 1,
|
||||
"chunk_size": 960,
|
||||
"model": "snowboy/snowboy.umdl",
|
||||
"sensitivity": 0.5
|
||||
"sensitivity": 0.5,
|
||||
"model_settings": {}
|
||||
},
|
||||
"porcupine": {
|
||||
"library_path": "porcupine/libpv_porcupine.so",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"kaldi": {
|
||||
"base_dictionary": "kaldi/base_dictionary.txt",
|
||||
"base_language_model": "kaldi/base_language_model.txt",
|
||||
"base_language_model_fst": "kaldi/base_language_model.fst",
|
||||
"compatible": true,
|
||||
"custom_words": "kaldi/custom_words.txt",
|
||||
"dictionary": "kaldi/dictionary.txt",
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
2 j
|
||||
9 j
|
||||
9: ?
|
||||
9~ V
|
||||
@ E
|
||||
@: ?
|
||||
A a
|
||||
A: ?
|
||||
A~ n
|
||||
E E
|
||||
E: s
|
||||
E~ I
|
||||
H j
|
||||
I ?
|
||||
J n
|
||||
N N
|
||||
O 0
|
||||
O~ n
|
||||
R r
|
||||
R: ?
|
||||
S tS
|
||||
SIL ?
|
||||
U ?
|
||||
Z dZ
|
||||
a a
|
||||
a: ?
|
||||
aU ?
|
||||
b b
|
||||
d d
|
||||
dZ dZ
|
||||
e eI
|
||||
e: ?
|
||||
f f
|
||||
g g
|
||||
h ?
|
||||
i I
|
||||
i: ?
|
||||
j @
|
||||
k k
|
||||
l l
|
||||
m m
|
||||
n n
|
||||
o oU
|
||||
o: ?
|
||||
p p
|
||||
pf ?
|
||||
r r
|
||||
s s
|
||||
t t
|
||||
tS tS
|
||||
ts t
|
||||
u u:
|
||||
u: ?
|
||||
v v
|
||||
w OI
|
||||
x ?
|
||||
y j
|
||||
y: ?
|
||||
z s
|
||||
{ ?
|
||||
@@ -0,0 +1,44 @@
|
||||
2 bleu b l 2
|
||||
9 club k l 9 b
|
||||
9~ aucun o k 9~
|
||||
@ ceci s @ s i
|
||||
A base b A z
|
||||
A~ andy A~ n d i
|
||||
E aies E
|
||||
E: têtes t E: t
|
||||
E~ bien b j E~
|
||||
H fuir f H i R
|
||||
I avril A v R I l
|
||||
J gagne g a J
|
||||
N king k i N
|
||||
O bord b O R
|
||||
O~ bons b O~
|
||||
R agir a Z i R
|
||||
S chef S E f
|
||||
Z ange A~ Z
|
||||
a abri a b R i
|
||||
a: marc m a: k
|
||||
b aube o b
|
||||
d aide E d
|
||||
dZ jack dZ a k
|
||||
e aidé E d e
|
||||
f afin a f E~
|
||||
g goût g u
|
||||
i agit a Z i
|
||||
j ayez E j e
|
||||
k acte a k t
|
||||
l allo a l o
|
||||
m aime E m
|
||||
n anna a n a
|
||||
o allô a l o
|
||||
p pain p E~
|
||||
r prison p r i z O~
|
||||
s alex a l E k s
|
||||
t bite b i t
|
||||
tS match m a tS
|
||||
ts cents s E n ts
|
||||
u chou S u
|
||||
v avec a v E k
|
||||
w coin k w E~
|
||||
y buts b y t
|
||||
z aise E z
|
||||
@@ -1,63 +1,98 @@
|
||||
{
|
||||
"language": "fr",
|
||||
"name": "fr",
|
||||
|
||||
"speech_to_text": {
|
||||
"system": "pocketsphinx",
|
||||
"dictionary_casing": "lower"
|
||||
},
|
||||
"intent": {
|
||||
"wavenet": {
|
||||
"language_code": "fr-FR"
|
||||
},
|
||||
"flair": {
|
||||
"embeddings": [
|
||||
"lm-fr-charlm-backward.pt", "lm-fr-charlm-forward.pt"
|
||||
]
|
||||
}
|
||||
},
|
||||
"download": {
|
||||
"conditions": {
|
||||
"speech_to_text.system": {
|
||||
"pocketsphinx": {
|
||||
"acoustic_model": "cmusphinx-fr-5.2.tar.gz:cmusphinx-fr-5.2",
|
||||
"base_dictionary.txt": "fr-g2p.tar.gz:base_dictionary.txt",
|
||||
"g2p.fst": "fr-g2p.tar.gz:g2p.fst"
|
||||
}
|
||||
},
|
||||
|
||||
"speech_to_text.pocketsphinx.mix_weight": {
|
||||
">0": {
|
||||
"base_language_model.txt": "fr-small.lm.gz:fr-small.lm"
|
||||
}
|
||||
},
|
||||
|
||||
"intent.system": {
|
||||
"flair": {
|
||||
"flair/cache/embeddings/lm-fr-charlm-backward.pt": "lm-fr-charlm-backward.pt",
|
||||
"flair/cache/embeddings/lm-fr-charlm-forward.pt": "lm-fr-charlm-forward.pt"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"files": {
|
||||
"cmusphinx-fr-5.2.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/cmusphinx-fr-5.2.tar.gz"
|
||||
},
|
||||
"fr-small.lm.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr-small.lm.gz"
|
||||
},
|
||||
"fr-g2p.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr-g2p.tar.gz"
|
||||
},
|
||||
"lm-fr-charlm-backward.pt": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/lm-fr-charlm-backward.pt",
|
||||
"cache": false
|
||||
},
|
||||
"lm-fr-charlm-forward.pt": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/lm-fr-charlm-forward.pt",
|
||||
"cache": false
|
||||
}
|
||||
}
|
||||
"language": "fr",
|
||||
"name": "fr",
|
||||
"speech_to_text": {
|
||||
"system": "pocketsphinx",
|
||||
"dictionary_casing": "lower",
|
||||
"kaldi": {
|
||||
"base_dictionary": "kaldi/base_dictionary.txt",
|
||||
"base_language_model": "kaldi/base_language_model.txt",
|
||||
"base_language_model_fst": "kaldi/base_language_model.fst",
|
||||
"compatible": true,
|
||||
"custom_words": "kaldi/custom_words.txt",
|
||||
"dictionary": "kaldi/dictionary.txt",
|
||||
"graph": "graph",
|
||||
"language_model": "kaldi/language_model.txt",
|
||||
"model_dir": "kaldi/model",
|
||||
"unknown_words": "kaldi/unknown_words.txt",
|
||||
"mix_fst": "kaldi/mixed.fst",
|
||||
"g2p_model": "kaldi/g2p.fst",
|
||||
"phoneme_examples": "kaldi/phoneme_examples.txt",
|
||||
"phoneme_map": "kaldi/espeak_phonemes.txt"
|
||||
}
|
||||
},
|
||||
"intent": {
|
||||
"wavenet": {
|
||||
"language_code": "fr-FR"
|
||||
},
|
||||
"flair": {
|
||||
"embeddings": [
|
||||
"lm-fr-charlm-backward.pt",
|
||||
"lm-fr-charlm-forward.pt"
|
||||
]
|
||||
}
|
||||
},
|
||||
"download": {
|
||||
"conditions": {
|
||||
"speech_to_text.system": {
|
||||
"pocketsphinx": {
|
||||
"acoustic_model": "cmusphinx-fr-5.2.tar.gz:cmusphinx-fr-5.2",
|
||||
"base_dictionary.txt": "fr-g2p.tar.gz:base_dictionary.txt",
|
||||
"g2p.fst": "fr-g2p.tar.gz:g2p.fst"
|
||||
},
|
||||
"kaldi": {
|
||||
"kaldi": "fr_kaldi-zamia.tar.gz:kaldi"
|
||||
}
|
||||
},
|
||||
"speech_to_text.pocketsphinx.mix_weight": {
|
||||
">0": {
|
||||
"base_language_model.txt": "fr-small.lm.gz:fr-small.lm"
|
||||
}
|
||||
},
|
||||
"speech_to_text.kaldi.mix_weight": {
|
||||
">0": {
|
||||
"kaldi/base_language_model.txt": "generic_fr_lang_model_small-r20191016.arpa.tar.gz:generic_fr_lang_model_small-r20191016.arpa"
|
||||
}
|
||||
},
|
||||
"speech_to_text.kaldi.open_transcription": {
|
||||
"True": {
|
||||
"kaldi/model/base_graph": "fr_kaldi-zamia-base_graph.tar.gz:base_graph"
|
||||
}
|
||||
},
|
||||
"intent.system": {
|
||||
"flair": {
|
||||
"flair/cache/embeddings/lm-fr-charlm-backward.pt": "lm-fr-charlm-backward.pt",
|
||||
"flair/cache/embeddings/lm-fr-charlm-forward.pt": "lm-fr-charlm-forward.pt"
|
||||
}
|
||||
}
|
||||
},
|
||||
"files": {
|
||||
"cmusphinx-fr-5.2.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/cmusphinx-fr-5.2.tar.gz"
|
||||
},
|
||||
"fr-small.lm.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr-small.lm.gz"
|
||||
},
|
||||
"fr-g2p.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr-g2p.tar.gz"
|
||||
},
|
||||
"lm-fr-charlm-backward.pt": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/lm-fr-charlm-backward.pt",
|
||||
"cache": false
|
||||
},
|
||||
"lm-fr-charlm-forward.pt": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/lm-fr-charlm-forward.pt",
|
||||
"cache": false
|
||||
},
|
||||
"generic_fr_lang_model_small-r20191016.arpa.xz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/generic_fr_lang_model_small-r20191016.arpa.xz"
|
||||
},
|
||||
"fr_kaldi-zamia.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr_kaldi-zamia.tar.gz"
|
||||
},
|
||||
"fr_kaldi-zamia-base_graph.tar.gz": {
|
||||
"url": "https://github.com/synesthesiam/rhasspy-profiles/releases/download/v1.0-fr/fr_kaldi-zamia-base_graph.tar.gz"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,25 @@
|
||||
openapi: "3.0.0"
|
||||
info:
|
||||
title: 'Rhasspy Voice Assistant'
|
||||
version: '2.0'
|
||||
version: '2.4'
|
||||
description: 'API for Rhasspy Voice Assistant Toolkit'
|
||||
schemes:
|
||||
- http
|
||||
paths:
|
||||
/api/version:
|
||||
get:
|
||||
summary: 'Get Rhasspy version'
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
schema:
|
||||
type: string
|
||||
/api/profiles:
|
||||
get:
|
||||
summary: 'Get available profiles'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -19,15 +29,15 @@ paths:
|
||||
/api/profile:
|
||||
get:
|
||||
summary: 'Get profile settings'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
parameters:
|
||||
- in: query
|
||||
name: layers
|
||||
description: ''
|
||||
enum: [all, defaults, profile]
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: layers
|
||||
description: ''
|
||||
enum: [all, defaults, profile]
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -35,7 +45,7 @@ paths:
|
||||
type: object
|
||||
post:
|
||||
summary: 'Overwrite profile settings'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
requestBody:
|
||||
description: 'JSON to write to profile'
|
||||
@@ -45,12 +55,12 @@ paths:
|
||||
schema:
|
||||
type: object
|
||||
parameters:
|
||||
- in: query
|
||||
name: layers
|
||||
description: ''
|
||||
enum: [default, profile]
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: layers
|
||||
description: ''
|
||||
enum: [default, profile]
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -59,7 +69,7 @@ paths:
|
||||
/api/listen-for-wake:
|
||||
post:
|
||||
summary: 'Make Rhasspy listen for the wake word'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -69,30 +79,30 @@ paths:
|
||||
/api/listen-for-command:
|
||||
post:
|
||||
summary: 'Make Rhasspy listen for a command'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
parameters:
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: entity
|
||||
description: 'Set slot named entity in recognized intent to value'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: value
|
||||
description: 'Set slot named entity in recognized intent to value'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: timeout
|
||||
description: 'Number of seconds before empty intent is returned'
|
||||
schema:
|
||||
type: float
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: entity
|
||||
description: 'Set slot named entity in recognized intent to value'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: value
|
||||
description: 'Set slot named entity in recognized intent to value'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: timeout
|
||||
description: 'Number of seconds before empty intent is returned'
|
||||
schema:
|
||||
type: float
|
||||
responses:
|
||||
'200':
|
||||
description: Intent
|
||||
@@ -101,7 +111,7 @@ paths:
|
||||
/api/microphones:
|
||||
get:
|
||||
summary: 'Get list of available microphones'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -111,7 +121,7 @@ paths:
|
||||
/api/test-microphones:
|
||||
get:
|
||||
summary: 'Get list of available microphones and if they are working'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -121,7 +131,7 @@ paths:
|
||||
/api/speakers:
|
||||
get:
|
||||
summary: 'Get list of available speakers'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -131,7 +141,7 @@ paths:
|
||||
/api/pronounce:
|
||||
post:
|
||||
summary: 'Pronounce a word or set of phonemes'
|
||||
produces:
|
||||
produces:
|
||||
- text/plain
|
||||
- audio/wav
|
||||
requestBody:
|
||||
@@ -142,24 +152,24 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
parameters:
|
||||
- in: query
|
||||
name: pronounce_type
|
||||
description: 'Input type'
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
enum: [word, phonemes]
|
||||
- in: query
|
||||
name: download
|
||||
description: 'Return WAV file instead of using speakers'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: voice
|
||||
description: 'Voice/language to use (defaults to profile language)'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: pronounce_type
|
||||
description: 'Input type'
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
enum: [word, phonemes]
|
||||
- in: query
|
||||
name: download
|
||||
description: 'Return WAV file instead of using speakers'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: voice
|
||||
description: 'Voice/language to use (defaults to profile language)'
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -174,7 +184,7 @@ paths:
|
||||
/api/lookup:
|
||||
post:
|
||||
summary: 'Look up a word in the dictionary or guess pronunciation'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
requestBody:
|
||||
description: 'Word to look up'
|
||||
@@ -184,12 +194,12 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
parameters:
|
||||
- in: query
|
||||
name: n
|
||||
description: 'Number of pronunciations to generate'
|
||||
schema:
|
||||
type: integer
|
||||
default: 5
|
||||
- in: query
|
||||
name: n
|
||||
description: 'Number of pronunciations to generate'
|
||||
schema:
|
||||
type: integer
|
||||
default: 5
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -200,7 +210,7 @@ paths:
|
||||
/api/phonemes:
|
||||
get:
|
||||
summary: 'Get examples of phonemes for a profile'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -212,7 +222,7 @@ paths:
|
||||
/api/sentences:
|
||||
get:
|
||||
summary: 'Get example sentences for profile'
|
||||
produces:
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
'200':
|
||||
@@ -240,7 +250,7 @@ paths:
|
||||
/api/custom-words:
|
||||
get:
|
||||
summary: 'Get custom words for profile'
|
||||
produces:
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
'200':
|
||||
@@ -268,7 +278,7 @@ paths:
|
||||
/api/unknown-words:
|
||||
get:
|
||||
summary: 'Get unknown words for profile'
|
||||
produces:
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
@@ -327,12 +337,12 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
parameters:
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -351,12 +361,28 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
parameters:
|
||||
- in: query
|
||||
name: repeat
|
||||
description: 'True if Rhasspy should repeat the last spoken sentence'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: repeat
|
||||
description: 'True if Rhasspy should repeat the last spoken sentence'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: voice
|
||||
description: 'Override default voice'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: language
|
||||
description: 'Override default language'
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: play
|
||||
description: 'Speak if true, return WAV data if false'
|
||||
schema:
|
||||
type: boolean
|
||||
default: true
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -397,12 +423,12 @@ paths:
|
||||
type: string
|
||||
format: binary
|
||||
parameters:
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
@@ -424,12 +450,12 @@ paths:
|
||||
post:
|
||||
summary: 'Stop recording WAV file, transcribe, extract intent, and send to Home Assistant'
|
||||
parameters:
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: nohass
|
||||
description: 'True if intent should NOT be sent to Home Assistant'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: Intent
|
||||
@@ -441,16 +467,69 @@ paths:
|
||||
post:
|
||||
summary: 'Force Rhasspy to download files for current profile'
|
||||
parameters:
|
||||
- in: query
|
||||
name: delete
|
||||
description: 'True if download cache should be deleted'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
- in: query
|
||||
name: delete
|
||||
description: 'True if download cache should be deleted'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: Log
|
||||
description: OK
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
/api/slots:
|
||||
get:
|
||||
summary: 'Get all slot values'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
post:
|
||||
summary: 'Add to or overwrite slot values'
|
||||
requestBody:
|
||||
description: 'JSON with slot names and value lists'
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
parameters:
|
||||
- in: query
|
||||
name: overwrite_all
|
||||
description: 'True if slots in JSON should be overwritten'
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
/api/version:
|
||||
get:
|
||||
summary: 'Get Rhasspy version'
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
'200':
|
||||
description: Version
|
||||
schema:
|
||||
type: string
|
||||
/api/intents:
|
||||
get:
|
||||
summary: 'Get all defined intents'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
'200':
|
||||
description: intents
|
||||
schema:
|
||||
type: object
|
||||
|
||||
@@ -24,7 +24,10 @@ disable=
|
||||
unnecessary-pass,
|
||||
unused-argument,
|
||||
invalid-name,
|
||||
broad-except
|
||||
broad-except,
|
||||
no-self-use,
|
||||
c-extension-no-member,
|
||||
too-many-nested-blocks
|
||||
|
||||
[FORMAT]
|
||||
expected-line-ending-format=LF
|
||||
@@ -1,17 +1,20 @@
|
||||
pydash
|
||||
requests
|
||||
paho-mqtt
|
||||
PyAudio
|
||||
webrtcvad
|
||||
fuzzywuzzy[speedup]
|
||||
adapt-parser
|
||||
google-cloud-texttospeech
|
||||
bs4
|
||||
html5lib
|
||||
adapt-parser==0.3.4
|
||||
aiohttp==3.6.2
|
||||
doit==0.31.1
|
||||
fuzzywuzzy[speedup]==0.17.0
|
||||
google-cloud-texttospeech==0.5.0
|
||||
html5lib==1.0.1
|
||||
json5==0.8.5
|
||||
multidict==4.6.1
|
||||
networkx>=2.0
|
||||
doit
|
||||
num2words==0.5.10
|
||||
openfst==1.6.9
|
||||
num2words
|
||||
quart
|
||||
quart-cors
|
||||
aiohttp
|
||||
paho-mqtt==1.5.0
|
||||
PyAudio==0.2.11
|
||||
pydash==4.7.6
|
||||
quart==0.6.15
|
||||
quart-cors==0.1.3
|
||||
requests==2.22.0
|
||||
rhasspy-nlu==0.1.3
|
||||
swagger-ui-py==0.1.7
|
||||
webrtcvad==2.0.10
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
adapt-parser==0.3.4
|
||||
aiohttp==3.6.2
|
||||
doit==0.31.1
|
||||
fuzzywuzzy[speedup]==0.17.0
|
||||
google-cloud-texttospeech==0.5.0
|
||||
html5lib==1.0.1
|
||||
json5==0.8.5
|
||||
multidict==4.6.1
|
||||
networkx>=2.0
|
||||
num2words==0.5.10
|
||||
openfst==1.6.9
|
||||
paho-mqtt==1.5.0
|
||||
PyAudio==0.2.11
|
||||
pydash==4.7.6
|
||||
quart==0.6.15
|
||||
quart-cors==0.1.3
|
||||
requests==2.22.0
|
||||
rhasspy-nlu==0.1.3
|
||||
swagger-ui-py==0.1.7
|
||||
webrtcvad==2.0.10
|
||||
|
||||
flake8==3.7.9
|
||||
pylint==2.4.4
|
||||
pyinstaller==3.5
|
||||
mkdocs==1.0.4
|
||||
@@ -0,0 +1,17 @@
|
||||
aiohttp==3.6.2
|
||||
doit==0.31.1
|
||||
fuzzywuzzy[speedup]==0.17.0
|
||||
html5lib==1.0.1
|
||||
multidict==4.6.1
|
||||
networkx>=2.0
|
||||
num2words==0.5.10
|
||||
openfst==1.6.9
|
||||
paho-mqtt==1.5.0
|
||||
PyAudio==0.2.11
|
||||
pydash==4.7.6
|
||||
quart==0.6.15
|
||||
quart-cors==0.1.3
|
||||
requests==2.22.0
|
||||
rhasspy-nlu==0.1.1
|
||||
swagger-ui-py==0.1.7
|
||||
webrtcvad==2.0.10
|
||||
@@ -1,77 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Rhasspy command-line interface"""
|
||||
import argparse
|
||||
import asyncio
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
import logging.config
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import wave
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from typing import Any
|
||||
|
||||
import pydash
|
||||
|
||||
from rhasspy.actor import ActorSystem, Configured, ConfigureEvent, RhasspyActor
|
||||
from rhasspy.audio_player import DummyAudioPlayer
|
||||
from rhasspy.audio_recorder import AudioData, StartStreaming, StopStreaming
|
||||
from rhasspy.audio_recorder import AudioData
|
||||
from rhasspy.core import RhasspyCore
|
||||
from rhasspy.dialogue import DialogueManager
|
||||
from rhasspy.profiles import Profile
|
||||
from rhasspy.utils import buffer_to_wav, maybe_convert_wav
|
||||
from rhasspy.wake import (
|
||||
ListenForWakeWord,
|
||||
PocketsphinxWakeListener,
|
||||
StopListeningForWakeWord,
|
||||
WakeWordDetected,
|
||||
WakeWordNotDetected,
|
||||
)
|
||||
from rhasspy.utils import buffer_to_wav
|
||||
from rhasspy.wake import WakeWordDetected
|
||||
|
||||
logger = logging.getLogger("rhasspy")
|
||||
|
||||
|
||||
try:
|
||||
# Need to import here because they screw with logging
|
||||
import flair
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
logging.config.dictConfig(
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": True,
|
||||
"formatters": {
|
||||
"rhasspy.format": {"format": "%(levelname)s:%(name)s:%(message)s"}
|
||||
},
|
||||
"handlers": {
|
||||
"rhasspy.handler": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "rhasspy.format",
|
||||
"stream": "ext://sys.stderr",
|
||||
}
|
||||
},
|
||||
"loggers": {
|
||||
"rhasspy": {"handlers": ["rhasspy.handler"], "propagate": False},
|
||||
"flair": {
|
||||
"handlers": ["rhasspy.handler"],
|
||||
"level": "INFO",
|
||||
"propagate": False,
|
||||
},
|
||||
},
|
||||
"root": {"handlers": ["rhasspy.handler"]},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Globals
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -82,6 +32,7 @@ mic_stdin_running = False
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Main method"""
|
||||
global mic_stdin_running, mic_stdin_thread
|
||||
|
||||
# Parse command-line arguments
|
||||
@@ -127,15 +78,6 @@ async def main() -> None:
|
||||
"--defaults", action="store_true", help="Only print default settings"
|
||||
)
|
||||
|
||||
sentences_parser = sub_parsers.add_parser(
|
||||
"sentences", help="Print profile sentences.ini"
|
||||
)
|
||||
|
||||
# validate
|
||||
# validate_parser = sub_parsers.add_parser(
|
||||
# "validate", help="Validate profile against schema"
|
||||
# )
|
||||
|
||||
# wav2text
|
||||
wav2text_parser = sub_parsers.add_parser(
|
||||
"wav2text", help="WAV file to text transcription"
|
||||
@@ -166,39 +108,6 @@ async def main() -> None:
|
||||
"--no-cache", action="store_true", help="Clear training cache"
|
||||
)
|
||||
|
||||
# record
|
||||
# record_parser = sub_parsers.add_parser('record', help='Record test phrases for profile')
|
||||
# record_parser.add_argument('--directory', help='Directory to write WAV files and intent JSON files')
|
||||
|
||||
# record-wake
|
||||
# record_wake_parser = sub_parsers.add_parser('record-wake', help='Record wake word examples for profile')
|
||||
# record_wake_parser.add_argument('--directory', help='Directory to write WAV files')
|
||||
# record_wake_parser.add_argument('--negative', action='store_true', help='Record negative examples (not the wake word)')
|
||||
|
||||
# tune
|
||||
# tune_parser = sub_parsers.add_parser('tune', help='Tune speech acoustic model for profile')
|
||||
# tune_parser.add_argument('--directory', help='Directory with WAV files and intent JSON files')
|
||||
|
||||
# tune-wake
|
||||
# tune_wake_parser = sub_parsers.add_parser('tune-wake', help='Tune wake acoustic model for profile')
|
||||
# tune_wake_parser.add_argument('--directory', help='Directory with WAV files')
|
||||
|
||||
# test
|
||||
# test_parser = sub_parsers.add_parser('test', help='Test speech/intent recognizers for profile')
|
||||
# test_parser.add_argument('directory', help='Directory with WAV files and intent JSON files')
|
||||
|
||||
# test-wake
|
||||
# test_wake_parser = sub_parsers.add_parser(
|
||||
# "test-wake", help="Test wake word examples for profile"
|
||||
# )
|
||||
# test_wake_parser.add_argument("directory", help="Directory with WAV files")
|
||||
# test_wake_parser.add_argument(
|
||||
# "--threads", type=int, default=4, help="Number of threads to use"
|
||||
# )
|
||||
# test_wake_parser.add_argument(
|
||||
# "--system", type=str, default=None, help="Override wake word system"
|
||||
# )
|
||||
|
||||
# mic2wav
|
||||
mic2wav_parser = sub_parsers.add_parser("mic2wav", help="Voice command to WAV data")
|
||||
mic2wav_parser.add_argument(
|
||||
@@ -295,7 +204,7 @@ async def main() -> None:
|
||||
text2speech_parser.add_argument("sentences", nargs="*", help="Sentences to speak")
|
||||
|
||||
# sleep
|
||||
sleep_parser = sub_parsers.add_parser("sleep", help="Wait for wake word")
|
||||
sub_parsers.add_parser("sleep", help="Wait for wake word")
|
||||
|
||||
# download
|
||||
download_parser = sub_parsers.add_parser("download", help="Download profile files")
|
||||
@@ -304,25 +213,21 @@ async def main() -> None:
|
||||
)
|
||||
|
||||
# check
|
||||
check_parser = sub_parsers.add_parser(
|
||||
"check", help="Check downloaded profile files"
|
||||
)
|
||||
sub_parsers.add_parser("check", help="Check downloaded profile files")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.debug:
|
||||
logging.root.setLevel(logging.DEBUG)
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
profiles_dirs = [args.system_profiles, args.user_profiles]
|
||||
logger.debug(profiles_dirs)
|
||||
|
||||
default_settings = Profile.load_defaults(args.system_profiles)
|
||||
|
||||
# Create rhasspy core
|
||||
from rhasspy.core import RhasspyCore
|
||||
|
||||
core = RhasspyCore(args.profile, args.system_profiles, args.user_profiles)
|
||||
|
||||
# Add profile settings from the command line
|
||||
@@ -330,10 +235,10 @@ async def main() -> None:
|
||||
for key, value in args.set:
|
||||
try:
|
||||
value = json.loads(value)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.debug("Profile: {0}={1}".format(key, value))
|
||||
logger.debug("Profile: %s=%s", key, value)
|
||||
extra_settings[key] = value
|
||||
core.profile.set(key, value)
|
||||
|
||||
@@ -345,18 +250,6 @@ async def main() -> None:
|
||||
else:
|
||||
# Print profile settings
|
||||
json.dump(core.profile.json, sys.stdout, indent=4)
|
||||
# elif args.command == "validate":
|
||||
# from cerberus import Validator
|
||||
|
||||
# schema_path = os.path.join(os.path.dirname(__file__), "profile_schema.json")
|
||||
# with open(schema_path, "r") as schema_file:
|
||||
# v = Validator(json.load(schema_file))
|
||||
# if v.validate(core.profile.json):
|
||||
# print("VALID")
|
||||
# else:
|
||||
# print("INVALID")
|
||||
# for err in v._errors:
|
||||
# print(err)
|
||||
elif args.command == "sentences":
|
||||
sentences_path = core.profile.read_path(
|
||||
core.profile.get("speech_to_text.sentences_ini", "sentences.ini")
|
||||
@@ -390,12 +283,6 @@ async def main() -> None:
|
||||
"text2intent": text2intent,
|
||||
"wav2intent": wav2intent,
|
||||
"train": train_profile,
|
||||
# 'record': record,
|
||||
# 'record-wake': record_wake,
|
||||
# 'tune': tune,
|
||||
# 'tune-wake': tune_wake,
|
||||
# 'test': test,
|
||||
# "test-wake": test_wake,
|
||||
"mic2text": mic2text,
|
||||
"mic2intent": mic2intent,
|
||||
"mic2wav": mic2wav,
|
||||
@@ -409,16 +296,17 @@ async def main() -> None:
|
||||
"check": check,
|
||||
}
|
||||
|
||||
if not args.command in ["test-wake"]:
|
||||
# Automatically start core
|
||||
await core.start()
|
||||
# Automatically start core
|
||||
await core.start()
|
||||
|
||||
if not args.no_check and (args.command not in ["check", "download"]):
|
||||
# Verify that profile has necessary files
|
||||
missing_files = core.check_profile()
|
||||
if len(missing_files) > 0:
|
||||
logger.fatal(
|
||||
f"Missing required files for {profile.name}: {missing_files.keys()}. Please run download command and try again."
|
||||
"Missing required files for %s: %s. Please run download command and try again.",
|
||||
profile.name,
|
||||
missing_files.keys(),
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
@@ -446,6 +334,7 @@ async def main() -> None:
|
||||
|
||||
|
||||
async def wav2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Transcribe WAV file(s)"""
|
||||
if len(args.wav_files) > 0:
|
||||
# Read WAV paths from argument list
|
||||
transcriptions = {}
|
||||
@@ -470,7 +359,7 @@ async def wav2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def text2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
# Parse sentences from command line or stdin
|
||||
"""Parse sentences from command line or stdin"""
|
||||
intents = {}
|
||||
sentences = args.sentences if len(args.sentences) > 0 else sys.stdin
|
||||
for sentence in sentences:
|
||||
@@ -492,6 +381,7 @@ async def text2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def wav2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Recognize intent from WAV file(s)"""
|
||||
if len(args.wav_files) > 0:
|
||||
# Read WAV paths from argument list
|
||||
transcriptions = {}
|
||||
@@ -530,453 +420,18 @@ async def wav2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def train_profile(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Train Rhasspy profile"""
|
||||
result = await core.train(reload_actors=False, no_cache=args.no_cache)
|
||||
print(result)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# record: record phrases for testing/tuning
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# def record(core:RhasspyCore, profile:Profile, args:Any) -> None:
|
||||
# dir_path = args.directory or profile.write_dir('record')
|
||||
# dir_name = os.path.split(dir_path)[1]
|
||||
# os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
# tagged_path = profile.read_path(profile.get('training.tagged_sentences'))
|
||||
# assert os.path.exists(tagged_path), 'Missing tagged sentences (%s). Need to train?' % tagged_path
|
||||
|
||||
# # Load and parse tagged sentences
|
||||
# intent_sentences = []
|
||||
# intent_name = ''
|
||||
# with open(tagged_path, 'r') as tagged_file:
|
||||
# for line in tagged_file:
|
||||
# line = line.strip()
|
||||
# if len(line) == 0:
|
||||
# continue # skip blank lines
|
||||
|
||||
# if line.startswith('# intent:'):
|
||||
# intent_name = line.split(':', maxsplit=1)[1]
|
||||
# elif line.startswith('-'):
|
||||
# tagged_sentence = line[1:].strip()
|
||||
# sentence, entities = extract_entities(tagged_sentence)
|
||||
# intent_sentences.append((intent_name, sentence, entities))
|
||||
|
||||
# assert len(intent_sentences) > 0, 'No tagged sentences available'
|
||||
# print('Loaded %s sentence(s)' % len(intent_sentences))
|
||||
|
||||
# # Record WAV files
|
||||
# audio_recorder = core.get_audio_recorder()
|
||||
# wav_prefix = dir_name
|
||||
# wav_num = 0
|
||||
# try:
|
||||
# while True:
|
||||
# intent_name, sentence, entities = random.choice(intent_sentences)
|
||||
# print('Speak the following sentence. Press ENTER to start (CTRL+C to quit).')
|
||||
# print(sentence)
|
||||
# input()
|
||||
# audio_recorder.start_recording(True, False)
|
||||
# print('Recording. Press ENTER to stop (CTRL+C to quit).')
|
||||
# input()
|
||||
# wav_data = audio_recorder.stop_recording(True, False)
|
||||
|
||||
# # Determine WAV file name
|
||||
# wav_path = os.path.join(dir_path, '%s-%03d.wav' % (wav_prefix, wav_num))
|
||||
# while os.path.exists(wav_path):
|
||||
# wav_num += 1
|
||||
# wav_path = os.path.join(dir_path, '%s-%03d.wav' % (wav_prefix, wav_num))
|
||||
|
||||
# # Write WAV data
|
||||
# with open(wav_path, 'wb') as wav_file:
|
||||
# wav_file.write(wav_data)
|
||||
|
||||
# # Write intent (with transcription)
|
||||
# intent_path = os.path.join(dir_path, '%s-%03d.wav.json' % (wav_prefix, wav_num))
|
||||
# with open(intent_path, 'w') as intent_file:
|
||||
# # Use Rasa NLU format
|
||||
# intent = {
|
||||
# 'text': sentence,
|
||||
# 'intent': { 'name': intent_name },
|
||||
# 'entities': [
|
||||
# { 'entity': entity, 'value': value }
|
||||
# for entity, value in entities
|
||||
# ]
|
||||
# }
|
||||
|
||||
# json.dump(intent, intent_file, indent=4)
|
||||
|
||||
# print('')
|
||||
# except KeyboardInterrupt:
|
||||
# print('Done')
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# record-wake: record wake word examples
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# def record_wake(core:RhasspyCore, profile:Profile, args:Any) -> None:
|
||||
# keyphrase = profile.get('wake.pocketsphinx.keyphrase', '')
|
||||
# assert len(keyphrase) > 0, 'No wake word'
|
||||
|
||||
# wav_prefix = keyphrase.replace(' ', '-')
|
||||
# base_dir_path = args.directory or profile.write_dir('record')
|
||||
|
||||
# if args.negative:
|
||||
# dir_path = os.path.join(base_dir_path, wav_prefix, 'not-wake-word')
|
||||
# else:
|
||||
# dir_path = os.path.join(base_dir_path, wav_prefix, 'wake-word')
|
||||
|
||||
# os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
# # Record WAV files
|
||||
# audio_recorder = core.get_audio_recorder()
|
||||
# wav_num = 0
|
||||
# try:
|
||||
# while True:
|
||||
# # Determine WAV file name
|
||||
# wav_path = os.path.join(dir_path, '%s-%02d.wav' % (wav_prefix, wav_num))
|
||||
# while os.path.exists(wav_path):
|
||||
# wav_num += 1
|
||||
# wav_path = os.path.join(dir_path, '%s-%02d.wav' % (wav_prefix, wav_num))
|
||||
|
||||
# if args.negative:
|
||||
# print('Speak anything EXCEPT the wake word. Press ENTER to start (CTRL+C to quit).')
|
||||
# print('NOT %s (%s)' % (keyphrase, wav_num))
|
||||
# else:
|
||||
# print('Speak your wake word. Press ENTER to start (CTRL+C to quit).')
|
||||
# print('%s (%s)' % (keyphrase, wav_num))
|
||||
|
||||
# input()
|
||||
# audio_recorder.start_recording(True, False)
|
||||
# print('Recording. Press ENTER to stop (CTRL+C to quit).')
|
||||
# input()
|
||||
# wav_data = audio_recorder.stop_recording(True, False)
|
||||
|
||||
# # Write WAV data
|
||||
# with open(wav_path, 'wb') as wav_file:
|
||||
# wav_file.write(wav_data)
|
||||
|
||||
# print('')
|
||||
# except KeyboardInterrupt:
|
||||
# print('Done')
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# tune: fine tune speech acoustic model
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# def tune(core:RhasspyCore, profile:Profile, args:Any) -> None:
|
||||
# dir_path = args.directory or profile.read_path('record')
|
||||
# assert os.path.exists(dir_path), 'Directory does not exist'
|
||||
# wav_paths = [os.path.join(dir_path, name)
|
||||
# for name in os.listdir(dir_path)
|
||||
# if name.endswith('.wav')]
|
||||
|
||||
# # Load intents for each WAV
|
||||
# wav_intents = {}
|
||||
# for wav_path in wav_paths:
|
||||
# intent_path = wav_path + '.json'
|
||||
# if os.path.exists(intent_path):
|
||||
# with open(intent_path, 'r') as intent_file:
|
||||
# wav_intents[wav_path] = json.load(intent_file)
|
||||
|
||||
# # Do tuning
|
||||
# tuner = core.get_speech_tuner(profile.name)
|
||||
# tuner.preload()
|
||||
|
||||
# print('Tuning speech system with %s WAV file(s)' % len(wav_intents))
|
||||
# tune_start = time.time()
|
||||
# tuner.tune(wav_intents)
|
||||
# print('Finished tuning in %s second(s)' % (time.time() - tune_start))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# tune-wake: fine tune wake acoustic model
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# def tune_wake(core:RhasspyCore, profile:Profile, args:Any) -> None:
|
||||
# keyphrase = profile.get('wake.pocketsphinx.keyphrase', '')
|
||||
# assert len(keyphrase) > 0, 'No wake word'
|
||||
|
||||
# wav_prefix = keyphrase.replace(' ', '-')
|
||||
# base_dir_path = args.directory or profile.read_path('record')
|
||||
|
||||
# # Path to positive examples
|
||||
# true_path = os.path.join(base_dir_path, wav_prefix, 'wake-word')
|
||||
# if os.path.exists(true_path):
|
||||
# true_wav_paths = [os.path.join(true_path, name)
|
||||
# for name in os.listdir(true_path)
|
||||
# if name.endswith('.wav')]
|
||||
# else:
|
||||
# true_wav_paths = []
|
||||
|
||||
# # Path to negative examples
|
||||
# false_path = os.path.join(base_dir_path, wav_prefix, 'not-wake-word')
|
||||
# if os.path.exists(false_path):
|
||||
# false_wav_paths = [os.path.join(false_path, name)
|
||||
# for name in os.listdir(false_path)
|
||||
# if name.endswith('.wav')]
|
||||
# else:
|
||||
# false_wav_paths = []
|
||||
|
||||
# # Do tuning
|
||||
# mllr_path = profile.write_path(
|
||||
# profile.get('wake.pocketsphinx.mllr_matrix'))
|
||||
|
||||
# tuner = SphinxTrainSpeechTuner(profile)
|
||||
# tuner.preload()
|
||||
|
||||
# # Add "transcriptions"
|
||||
# wav_intents = {}
|
||||
# for wav_path in true_wav_paths:
|
||||
# wav_intents[wav_path] = { 'text': keyphrase }
|
||||
|
||||
# for wav_path in false_wav_paths:
|
||||
# wav_intents[wav_path] = { 'text': '' }
|
||||
|
||||
# print('Tuning wake word system with %s positive and %s negative example(s)' % (len(true_wav_paths), len(false_wav_paths)))
|
||||
# tune_start = time.time()
|
||||
# tuner.tune(wav_intents, mllr_path=mllr_path)
|
||||
# print('Finished tuning in %s second(s)' % (time.time() - tune_start))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# test: test speech/intent recognizers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# def test(core:RhasspyCore, profile:Profile, args:Any) -> None:
|
||||
# dir_path = args.directory or profile.read_path('record')
|
||||
# assert os.path.exists(dir_path), 'Directory does not exist'
|
||||
# wav_paths = [os.path.join(dir_path, name)
|
||||
# for name in os.listdir(dir_path)
|
||||
# if name.endswith('.wav')]
|
||||
|
||||
# # Load intents for each WAV
|
||||
# wav_intents = {}
|
||||
# for wav_path in wav_paths:
|
||||
# intent_path = wav_path + '.json'
|
||||
# if os.path.exists(intent_path):
|
||||
# with open(intent_path, 'r') as intent_file:
|
||||
# wav_intents[wav_path] = json.load(intent_file)
|
||||
|
||||
# # Transcribe and match intent names/entities
|
||||
# decoder = core.get_speech_decoder(profile.name)
|
||||
# decoder.preload()
|
||||
|
||||
# recognizer = core.get_intent_recognizer(profile.name)
|
||||
# recognizer.preload()
|
||||
|
||||
# # TODO: parallelize
|
||||
# results = {}
|
||||
# for wav_path, expected_intent in wav_intents.items():
|
||||
# # Transcribe
|
||||
# decode_start = time.time()
|
||||
# with open(wav_path, 'rb') as wav_file:
|
||||
# actual_sentence = decoder.transcribe_wav(wav_file.read())
|
||||
|
||||
# decode_sec = time.time() - decode_start
|
||||
|
||||
# # Recognize
|
||||
# recognize_start = time.time()
|
||||
# actual_intent = recognizer.recognize(actual_sentence)
|
||||
# recognize_sec = time.time() - recognize_start
|
||||
|
||||
# wav_name = os.path.split(wav_path)[1]
|
||||
# results[wav_name] = {
|
||||
# 'profile': profile.name,
|
||||
# 'expected': expected_intent,
|
||||
# 'actual': actual_intent,
|
||||
# 'speech': {
|
||||
# 'system': profile.get('speech_to_text.system'),
|
||||
# 'time_sec': decode_sec
|
||||
# },
|
||||
# 'intent': {
|
||||
# 'system': profile.get('intent.system'),
|
||||
# 'time_sec': recognize_sec
|
||||
# }
|
||||
# }
|
||||
|
||||
# json.dump(results, sys.stdout, indent=4)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# test-wake: test wake word examples
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# def test_wake(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
# base_dir_path = args.directory
|
||||
# wake_system = args.system or profile.get("wake.system", "pocketsphinx")
|
||||
|
||||
# # Path to positive examples
|
||||
# true_path = os.path.join(base_dir_path, "wake-word")
|
||||
# true_wav_paths: List[str] = []
|
||||
# if os.path.exists(true_path):
|
||||
# true_wav_paths = [
|
||||
# os.path.join(true_path, name)
|
||||
# for name in os.listdir(true_path)
|
||||
# if name.endswith(".wav")
|
||||
# ]
|
||||
|
||||
# # Path to negative examples
|
||||
# false_path = os.path.join(base_dir_path, "not-wake-word")
|
||||
# false_wav_paths: List[str] = []
|
||||
# if os.path.exists(false_path):
|
||||
# false_wav_paths = [
|
||||
# os.path.join(false_path, name)
|
||||
# for name in os.listdir(false_path)
|
||||
# if name.endswith(".wav")
|
||||
# ]
|
||||
|
||||
# # Spin up actors
|
||||
# kwargs: Dict[str, Any] = {}
|
||||
# if not args.debug:
|
||||
# kwargs = {"logDefs": {"version": 1, "loggers": {"": {}}}}
|
||||
|
||||
# system = ActorSystem("multiprocTCPBase", **kwargs)
|
||||
# detected_paths: Set[str] = set()
|
||||
|
||||
# try:
|
||||
# test_actor = system.createActor(TestWakeActor)
|
||||
# all_wav_paths = true_wav_paths + false_wav_paths
|
||||
|
||||
# start_time = time.time()
|
||||
# with system.private() as private:
|
||||
# private.tell(test_actor, ConfigureEvent(profile, transitions=False))
|
||||
# result = private.listen()
|
||||
# assert isinstance(result, Configured)
|
||||
|
||||
# private.tell(test_actor, (wake_system, args.threads, all_wav_paths))
|
||||
|
||||
# # Collect WAV paths that had a positive detection
|
||||
# detected_paths = private.listen()
|
||||
|
||||
# end_time = time.time()
|
||||
# finally:
|
||||
# system.shutdown()
|
||||
|
||||
# # Compute statistics
|
||||
# expected_true = len(true_wav_paths)
|
||||
# expected_false = len(false_wav_paths)
|
||||
|
||||
# true_positives = 0
|
||||
# false_positives = 0
|
||||
# true_negatives = 0
|
||||
# false_negatives = 0
|
||||
|
||||
# should_be_true = True
|
||||
# for wav_path in itertools.chain(true_wav_paths, [None], false_wav_paths):
|
||||
# # Switch between true and false examples
|
||||
# if wav_path is None:
|
||||
# should_be_true = not should_be_true
|
||||
# continue
|
||||
|
||||
# detected = wav_path in detected_paths
|
||||
# if detected:
|
||||
# if should_be_true:
|
||||
# true_positives += 1
|
||||
# status = ""
|
||||
# else:
|
||||
# false_positives += 1
|
||||
# status = ":("
|
||||
# else:
|
||||
# if should_be_true:
|
||||
# false_negatives += 1
|
||||
# status = ":("
|
||||
# else:
|
||||
# true_negatives += 1
|
||||
# status = ""
|
||||
|
||||
# # Report
|
||||
# result = {
|
||||
# "system": wake_system,
|
||||
# "settings": profile.get("wake.%s" % wake_system, {}),
|
||||
# "detected": list(detected_paths),
|
||||
# "not_detected": list(set(all_wav_paths) - set(detected_paths)),
|
||||
# "time_sec": end_time - start_time,
|
||||
# "statistics": {
|
||||
# "true_positives": true_positives,
|
||||
# "true_negatives": true_negatives,
|
||||
# "false_positives": false_positives,
|
||||
# "false_negatives": false_negatives,
|
||||
# },
|
||||
# }
|
||||
|
||||
# json.dump(result, sys.stdout, indent=4)
|
||||
|
||||
|
||||
# # -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# class TestWakeActor(RhasspyActor):
|
||||
# def __init__(self):
|
||||
# RhasspyActor.__init__(self)
|
||||
# self.actors: List[RhasspyActor] = []
|
||||
# self.wav_paths: List[str] = []
|
||||
# self.wav_paths_left: List[str] = []
|
||||
# self.detected_paths: Set[str] = set()
|
||||
|
||||
# def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
# if isinstance(message, tuple):
|
||||
# # Start up
|
||||
# self.parent = sender
|
||||
# wake_system, num_actors, self.wav_paths = message
|
||||
# self.wav_paths_left = list(self.wav_paths)
|
||||
|
||||
# # Create actors
|
||||
# wake_class = DialogueManager.get_wake_class(wake_system)
|
||||
# for i in range(num_actors):
|
||||
# actor = self.createActor(wake_class)
|
||||
# self.send(
|
||||
# actor,
|
||||
# ConfigureEvent(
|
||||
# profile=self.profile,
|
||||
# preload=True,
|
||||
# recorder=self.myAddress,
|
||||
# transitions=False,
|
||||
# not_detected=True,
|
||||
# ),
|
||||
# )
|
||||
|
||||
# self.transition("loaded")
|
||||
|
||||
# def in_loaded(self, message: Any, sender: RhasspyActor) -> None:
|
||||
# if isinstance(message, Configured):
|
||||
# self.send(sender, ListenForWakeWord())
|
||||
# elif isinstance(message, StartStreaming):
|
||||
# if len(self.wav_paths) > 0:
|
||||
# self.send_random_wav(sender)
|
||||
# elif isinstance(message, WakeWordDetected):
|
||||
# # Detected
|
||||
# wav_path = message.audio_data_info["path"]
|
||||
# # print('!', end='', flush=True)
|
||||
# self.detected_paths.add(wav_path)
|
||||
# if wav_path in self.wav_paths_left:
|
||||
# self.wav_paths_left.remove(wav_path)
|
||||
|
||||
# if len(self.wav_paths) > 0:
|
||||
# self.send_random_wav(sender)
|
||||
|
||||
# elif isinstance(message, WakeWordNotDetected):
|
||||
# # Not detected
|
||||
# wav_path = message.audio_data_info["path"]
|
||||
# # print('.', end='', flush=True)
|
||||
# self.wav_paths_left.remove(wav_path)
|
||||
# if len(self.wav_paths) > 0:
|
||||
# self.send_random_wav(sender)
|
||||
|
||||
# if len(self.wav_paths_left) == 0:
|
||||
# self.send(self.parent, self.detected_paths)
|
||||
|
||||
# def send_random_wav(self, receiver):
|
||||
# index = random.randint(0, len(self.wav_paths) - 1)
|
||||
# wav_path = self.wav_paths.pop(index)
|
||||
# with open(wav_path, "rb") as wav_file:
|
||||
# audio_data = maybe_convert_wav(wav_file.read())
|
||||
# self.send(receiver, AudioData(audio_data, path=wav_path))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# mic2wav: record voice command and output WAV data
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def mic2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Record voice command from microphone"""
|
||||
# Listen until silence
|
||||
wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)
|
||||
|
||||
@@ -990,6 +445,7 @@ async def mic2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def mic2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Record voice command and transcribe"""
|
||||
# Listen until silence
|
||||
wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)
|
||||
|
||||
@@ -1006,6 +462,7 @@ async def mic2text(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
def read_audio_stdin(core: RhasspyCore, chunk_size: int = 960):
|
||||
"""Record audio chunks from stdin"""
|
||||
global mic_stdin_running
|
||||
while mic_stdin_running:
|
||||
audio_data = sys.stdin.buffer.read(chunk_size)
|
||||
@@ -1013,6 +470,7 @@ def read_audio_stdin(core: RhasspyCore, chunk_size: int = 960):
|
||||
|
||||
|
||||
async def mic2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Record voice command, transcribe, and recognize intent"""
|
||||
# Listen until silence
|
||||
wav_data = buffer_to_wav((await core.record_command(args.timeout)).data)
|
||||
|
||||
@@ -1035,6 +493,7 @@ async def mic2intent(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def word2phonemes(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Get pronunciation(s) for word(s)"""
|
||||
words = args.words if len(args.words) > 0 else sys.stdin
|
||||
|
||||
# Get pronunciations for all words
|
||||
@@ -1052,6 +511,7 @@ async def word2phonemes(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def word2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Speak a word's pronunciation"""
|
||||
# Get pronunciation for word
|
||||
all_pronunciations = (
|
||||
await core.get_word_pronunciations([args.word], n=1)
|
||||
@@ -1081,12 +541,14 @@ def _send_frame(
|
||||
width: int,
|
||||
channels: int,
|
||||
) -> None:
|
||||
"""Send a single audio frame via MQTT"""
|
||||
with io.BytesIO() as mqtt_buffer:
|
||||
with wave.open(mqtt_buffer, mode="wb") as mqtt_file:
|
||||
mqtt_file: wave.Wave_write = wave.open(mqtt_buffer, mode="wb")
|
||||
with mqtt_file:
|
||||
mqtt_file.setframerate(rate)
|
||||
mqtt_file.setsampwidth(width)
|
||||
mqtt_file.setnchannels(channels)
|
||||
mqtt_file.writeframesraw(audio_data)
|
||||
mqtt_file.writeframes(audio_data)
|
||||
|
||||
# Send audio frame WAV
|
||||
mqtt_payload = mqtt_buffer.getvalue()
|
||||
@@ -1094,6 +556,7 @@ def _send_frame(
|
||||
|
||||
|
||||
async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Publish WAV to MQTT as audio frames"""
|
||||
# hermes/audioServer/<SITE_ID>/audioFrame
|
||||
topic = "hermes/audioServer/%s/audioFrame" % args.site_id
|
||||
|
||||
@@ -1113,7 +576,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
num_chunks = int(
|
||||
(args.silence_before * rate * width * channels) / chunk_size
|
||||
)
|
||||
for i in range(num_chunks):
|
||||
for _ in range(num_chunks):
|
||||
_send_frame(
|
||||
core, topic, bytes(chunk_size), rate, width, channels
|
||||
)
|
||||
@@ -1134,7 +597,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
num_chunks = int(
|
||||
(args.silence_after * rate * width * channels) / chunk_size
|
||||
)
|
||||
for i in range(num_chunks):
|
||||
for _ in range(num_chunks):
|
||||
_send_frame(
|
||||
core, topic, bytes(chunk_size), rate, width, channels
|
||||
)
|
||||
@@ -1153,6 +616,7 @@ async def wav2mqtt(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def text2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Speak a sentence and output WAV data"""
|
||||
result = await core.speak_sentence(args)
|
||||
sys.stdout.buffer.write(result.wav_data)
|
||||
|
||||
@@ -1163,6 +627,7 @@ async def text2wav(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def text2speech(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Speak sentences"""
|
||||
sentences = args.sentences
|
||||
if len(sentences) == 0:
|
||||
sentences = sys.stdin
|
||||
@@ -1178,6 +643,7 @@ async def text2speech(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def sleep(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Wait for wake word to be spoken"""
|
||||
result = await core.wakeup_and_wait()
|
||||
if isinstance(result, WakeWordDetected):
|
||||
print(result.name)
|
||||
@@ -1191,6 +657,7 @@ async def sleep(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def download(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Download necessary profile files"""
|
||||
await core.download_profile(delete=args.delete)
|
||||
print("OK")
|
||||
|
||||
@@ -1201,6 +668,7 @@ async def download(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
|
||||
|
||||
async def check(core: RhasspyCore, profile: Profile, args: Any) -> None:
|
||||
"""Verify that profile files are downloaded"""
|
||||
missing_files = core.check_profile()
|
||||
json.dump(missing_files, sys.stdout, indent=4)
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Run Rhasspy command-line"""
|
||||
import asyncio
|
||||
|
||||
from rhasspy import main
|
||||
|
||||
@@ -116,7 +116,7 @@ class APlayAudioPlayer(RhasspyActor):
|
||||
aplay_cmd.append(path)
|
||||
|
||||
self._logger.debug(aplay_cmd)
|
||||
subprocess.run(aplay_cmd)
|
||||
subprocess.run(aplay_cmd, check=True)
|
||||
|
||||
def play_data(self, wav_data: bytes) -> None:
|
||||
"""Play a WAV buffer using aplay."""
|
||||
@@ -128,7 +128,7 @@ class APlayAudioPlayer(RhasspyActor):
|
||||
self._logger.debug(aplay_cmd)
|
||||
|
||||
# Play data
|
||||
subprocess.run(aplay_cmd, input=wav_data)
|
||||
subprocess.run(aplay_cmd, input=wav_data, check=True)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -160,7 +160,7 @@ class APlayAudioPlayer(RhasspyActor):
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# MQTT audio player for Snips.AI Hermes Protocol
|
||||
# https://docs.snips.ai/ressources/hermes-protocol
|
||||
# https://docs.snips.ai/reference/hermes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -170,7 +170,7 @@ class HermesAudioPlayer(RhasspyActor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.mqtt: Optional[RhasspyActor] = None
|
||||
self.site_ids:List[str] = []
|
||||
self.site_ids: List[str] = []
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
|
||||
@@ -149,6 +149,7 @@ class PyAudioRecorder(RhasspyActor):
|
||||
self.buffers: Dict[str, bytes] = defaultdict(bytes)
|
||||
self.device_index = None
|
||||
self.frames_per_buffer = 480
|
||||
self.keep_device_open = True
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -170,6 +171,10 @@ class PyAudioRecorder(RhasspyActor):
|
||||
self.profile.get("microphone.pyaudio.frames_per_buffer", 480)
|
||||
)
|
||||
|
||||
self.keep_device_open = self.profile.get(
|
||||
"microphone.pyaudio.keep_device_open", True
|
||||
)
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
if isinstance(message, StartStreaming):
|
||||
@@ -192,21 +197,25 @@ class PyAudioRecorder(RhasspyActor):
|
||||
|
||||
return (data, pyaudio.paContinue)
|
||||
|
||||
self.audio = pyaudio.PyAudio()
|
||||
assert self.audio is not None
|
||||
data_format = self.audio.get_format_from_width(2) # 16-bit
|
||||
self.mic = self.audio.open(
|
||||
format=data_format,
|
||||
channels=1,
|
||||
rate=16000,
|
||||
input_device_index=self.device_index,
|
||||
input=True,
|
||||
stream_callback=stream_callback,
|
||||
frames_per_buffer=self.frames_per_buffer,
|
||||
)
|
||||
if self.audio is None:
|
||||
self.audio = pyaudio.PyAudio()
|
||||
assert self.audio is not None
|
||||
data_format = self.audio.get_format_from_width(2) # 16-bit
|
||||
|
||||
if self.mic is None:
|
||||
self.mic = self.audio.open(
|
||||
format=data_format,
|
||||
channels=1,
|
||||
rate=16000,
|
||||
input_device_index=self.device_index,
|
||||
input=True,
|
||||
stream_callback=stream_callback,
|
||||
frames_per_buffer=self.frames_per_buffer,
|
||||
)
|
||||
|
||||
assert self.mic is not None
|
||||
self.mic.start_stream()
|
||||
|
||||
assert self.mic is not None
|
||||
self.mic.start_stream()
|
||||
self._logger.debug(
|
||||
"Recording from microphone (PyAudio, device=%s)", self.device_index
|
||||
)
|
||||
@@ -246,7 +255,11 @@ class PyAudioRecorder(RhasspyActor):
|
||||
self.send(message.receiver or sender, AudioData(buffer))
|
||||
|
||||
# Check to see if anyone is still listening
|
||||
if (len(self.receivers) == 0) and (len(self.buffers) == 0):
|
||||
if (
|
||||
(not self.keep_device_open)
|
||||
and (len(self.receivers) == 0)
|
||||
and (len(self.buffers) == 0)
|
||||
):
|
||||
# Terminate audio recording
|
||||
if self.mic is not None:
|
||||
self.mic.stop_stream()
|
||||
@@ -367,6 +380,7 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
self.is_recording: bool = True
|
||||
self.device_name: Optional[str] = None
|
||||
self.chunk_size: int = 960
|
||||
self.keep_device_open = True
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -383,6 +397,10 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
self.profile.get("microphone.arecord.chunk_size", self.chunk_size)
|
||||
)
|
||||
|
||||
self.keep_device_open = self.profile.get(
|
||||
"microphone.arecord.keep_device_open", True
|
||||
)
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
if isinstance(message, StartStreaming):
|
||||
@@ -430,9 +448,13 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
# Start recording
|
||||
try:
|
||||
self.is_recording = True
|
||||
self.recording_thread = threading.Thread(target=process_data, daemon=True)
|
||||
assert self.recording_thread is not None
|
||||
self.recording_thread.start()
|
||||
|
||||
if self.recording_thread is None:
|
||||
self.recording_thread = threading.Thread(
|
||||
target=process_data, daemon=True
|
||||
)
|
||||
assert self.recording_thread is not None
|
||||
self.recording_thread.start()
|
||||
|
||||
self._logger.debug("Recording from microphone (arecord)")
|
||||
except Exception:
|
||||
@@ -471,11 +493,16 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
self.send(message.receiver or sender, AudioData(buffer))
|
||||
|
||||
# Check to see if anyone is still listening
|
||||
if (len(self.receivers) == 0) and (len(self.buffers) == 0):
|
||||
if (
|
||||
(not self.keep_device_open)
|
||||
and (len(self.receivers) == 0)
|
||||
and (len(self.buffers) == 0)
|
||||
):
|
||||
# Terminate audio recording
|
||||
self.is_recording = False
|
||||
self.record_proc.terminate()
|
||||
self.record_proc = None
|
||||
self.recording_thread = None
|
||||
self.transition("started")
|
||||
self._logger.debug("Stopped recording from microphone (arecord)")
|
||||
|
||||
@@ -485,6 +512,7 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
self.is_recording = False
|
||||
if self.record_proc is not None:
|
||||
self.record_proc.terminate()
|
||||
self.recording_thread = None
|
||||
self._logger.debug("Stopped recording from microphone (arecord)")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@@ -565,7 +593,7 @@ class ARecordAudioRecorder(RhasspyActor):
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# MQTT based audio "recorder" for Snips.AI Hermes Protocol
|
||||
# https://docs.snips.ai/ressources/hermes-protocol
|
||||
# https://docs.snips.ai/reference/hermes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import subprocess
|
||||
import threading
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
import webrtcvad
|
||||
|
||||
@@ -118,6 +118,7 @@ class WebrtcvadCommandListener(RhasspyActor):
|
||||
self.timeout_sec: float = 30
|
||||
self.vad_mode: int = 0
|
||||
self.vad: Optional[webrtcvad.Vad] = None
|
||||
self.timeout_id: str = ""
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -167,7 +168,8 @@ class WebrtcvadCommandListener(RhasspyActor):
|
||||
|
||||
def to_listening(self, from_state: str) -> None:
|
||||
"""Transition to listening state."""
|
||||
self.wakeupAfter(timedelta(seconds=self.timeout_sec))
|
||||
self.timeout_id = str(uuid.uuid4())
|
||||
self.wakeupAfter(timedelta(seconds=self.timeout_sec), payload=self.timeout_id)
|
||||
|
||||
# Reset state
|
||||
self.chunk = bytes()
|
||||
@@ -184,16 +186,19 @@ class WebrtcvadCommandListener(RhasspyActor):
|
||||
def in_listening(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in listening state."""
|
||||
if isinstance(message, WakeupMessage):
|
||||
# Timeout
|
||||
self._logger.warning("Timeout")
|
||||
self.send(self.recorder, StopStreaming(self.myAddress))
|
||||
self.send(
|
||||
self.receiver,
|
||||
VoiceCommand(self.buffer or bytes(), timeout=True, handle=self.handle),
|
||||
)
|
||||
if message.payload == self.timeout_id:
|
||||
# Timeout
|
||||
self._logger.warning("Timeout")
|
||||
self.send(self.recorder, StopStreaming(self.myAddress))
|
||||
self.send(
|
||||
self.receiver,
|
||||
VoiceCommand(
|
||||
self.buffer or bytes(), timeout=True, handle=self.handle
|
||||
),
|
||||
)
|
||||
|
||||
self.buffer = bytes()
|
||||
self.transition("loaded")
|
||||
self.buffer = bytes()
|
||||
self.transition("loaded")
|
||||
elif isinstance(message, AudioData):
|
||||
self.chunk += message.data
|
||||
if len(self.chunk) >= self.chunk_size:
|
||||
@@ -396,7 +401,7 @@ class OneShotCommandListener(RhasspyActor):
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# MQTT-Based Command Listener (Hermes Protocol)
|
||||
# https://docs.snips.ai/ressources/hermes-protocol
|
||||
# https://docs.snips.ai/reference/hermes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import shutil
|
||||
import tempfile
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
import aiohttp
|
||||
|
||||
@@ -209,7 +209,7 @@ class RhasspyCore:
|
||||
assert isinstance(result, WavTranscription), result
|
||||
return result
|
||||
|
||||
async def recognize_intent(self, text: str) -> IntentRecognized:
|
||||
async def recognize_intent(self, text: str, wakeId: str = "") -> IntentRecognized:
|
||||
"""Recognize an intent from text."""
|
||||
assert self.actor_system is not None
|
||||
with self.actor_system.private() as sys:
|
||||
@@ -223,7 +223,7 @@ class RhasspyCore:
|
||||
# Replace numbers
|
||||
if self.profile.get("intent.replace_numbers", True):
|
||||
language = self.profile.get("language", "")
|
||||
if len(language) == 0:
|
||||
if not language:
|
||||
language = None
|
||||
|
||||
# 75 -> seventy five
|
||||
@@ -241,6 +241,10 @@ class RhasspyCore:
|
||||
|
||||
result.intent["slots"] = intent_slots
|
||||
|
||||
# Add wake/site ID
|
||||
result.intent["wakeId"] = wakeId
|
||||
result.intent["siteId"] = self.profile.get("mqtt.site_id", "default")
|
||||
|
||||
return result
|
||||
|
||||
async def handle_intent(self, intent: Dict[str, Any]) -> IntentHandled:
|
||||
@@ -312,11 +316,20 @@ class RhasspyCore:
|
||||
assert isinstance(result, WordSpoken), result
|
||||
return result
|
||||
|
||||
async def speak_sentence(self, sentence: str) -> SentenceSpoken:
|
||||
async def speak_sentence(
|
||||
self,
|
||||
sentence: str,
|
||||
play: bool = True,
|
||||
language: Optional[str] = None,
|
||||
voice: Optional[str] = None,
|
||||
) -> SentenceSpoken:
|
||||
"""Speak an entire sentence using text to speech system."""
|
||||
assert self.actor_system is not None
|
||||
with self.actor_system.private() as sys:
|
||||
result = await sys.async_ask(self.dialogue_manager, SpeakSentence(sentence))
|
||||
result = await sys.async_ask(
|
||||
self.dialogue_manager,
|
||||
SpeakSentence(sentence, play=play, language=language, voice=voice),
|
||||
)
|
||||
assert isinstance(result, SentenceSpoken), result
|
||||
return result
|
||||
|
||||
@@ -479,7 +492,7 @@ class RhasspyCore:
|
||||
conditions = self.profile.get("download.conditions", {})
|
||||
all_files = self.profile.get("download.files", {})
|
||||
files_to_copy = {}
|
||||
files_to_extract: Dict[str, List[Any]] = defaultdict(list)
|
||||
files_to_extract: Dict[str, List[Tuple[str, str]]] = defaultdict(list)
|
||||
files_to_download: Set[str] = set()
|
||||
|
||||
async def download_file(url, filename):
|
||||
@@ -654,8 +667,9 @@ class RhasspyCore:
|
||||
# Copy specific file/directory
|
||||
self._logger.debug("Copying %s to %s", extract_path, dest_path)
|
||||
if os.path.isdir(extract_path):
|
||||
if len(src_exclude) > 0:
|
||||
if src_exclude:
|
||||
# Ignore some files
|
||||
# pylint: disable=W0640
|
||||
shutil.copytree(
|
||||
extract_path,
|
||||
dest_path,
|
||||
|
||||
@@ -4,36 +4,27 @@ import os
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Type
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
import pywrapfst as fst
|
||||
|
||||
from rhasspy.actor import (
|
||||
ActorExitRequest,
|
||||
ChildActorExited,
|
||||
Configured,
|
||||
ConfigureEvent,
|
||||
RhasspyActor,
|
||||
StateTransition,
|
||||
WakeupMessage,
|
||||
)
|
||||
from rhasspy.audio_player import PlayWavData, PlayWavFile, WavPlayed, get_sound_class
|
||||
from rhasspy.audio_recorder import (
|
||||
AudioData,
|
||||
HTTPAudioRecorder,
|
||||
StartRecordingToBuffer,
|
||||
StopRecordingToBuffer,
|
||||
get_microphone_class,
|
||||
)
|
||||
from rhasspy.command_listener import ListenForCommand, VoiceCommand, get_command_class
|
||||
from rhasspy.intent import IntentRecognized, RecognizeIntent, get_recognizer_class
|
||||
from rhasspy.intent_handler import HandleIntent, IntentHandled, get_intent_handler_class
|
||||
from rhasspy.intent_train import (
|
||||
IntentTrainingComplete,
|
||||
IntentTrainingFailed,
|
||||
TrainIntent,
|
||||
get_intent_trainer_class,
|
||||
)
|
||||
from rhasspy.actor import (ActorExitRequest, ChildActorExited, Configured,
|
||||
ConfigureEvent, RhasspyActor, StateTransition,
|
||||
WakeupMessage)
|
||||
from rhasspy.audio_player import (PlayWavData, PlayWavFile, WavPlayed,
|
||||
get_sound_class)
|
||||
from rhasspy.audio_recorder import (AudioData, HTTPAudioRecorder,
|
||||
StartRecordingToBuffer,
|
||||
StopRecordingToBuffer,
|
||||
get_microphone_class)
|
||||
from rhasspy.command_listener import (ListenForCommand, VoiceCommand,
|
||||
get_command_class)
|
||||
from rhasspy.intent import (IntentRecognized, RecognizeIntent,
|
||||
get_recognizer_class)
|
||||
from rhasspy.intent_handler import (HandleIntent, IntentHandled,
|
||||
get_intent_handler_class)
|
||||
from rhasspy.intent_train import (IntentTrainingComplete, IntentTrainingFailed,
|
||||
TrainIntent, get_intent_trainer_class)
|
||||
from rhasspy.mqtt import MqttPublish
|
||||
from rhasspy.pronounce import GetWordPhonemes, GetWordPronunciations, SpeakWord
|
||||
from rhasspy.stt import TranscribeWav, WavTranscription, get_decoder_class
|
||||
@@ -41,13 +32,9 @@ from rhasspy.stt_train import get_speech_trainer_class
|
||||
from rhasspy.train import train_profile
|
||||
from rhasspy.tts import SpeakSentence, get_speech_class
|
||||
from rhasspy.utils import buffer_to_wav
|
||||
from rhasspy.wake import (
|
||||
ListenForWakeWord,
|
||||
StopListeningForWakeWord,
|
||||
WakeWordDetected,
|
||||
WakeWordNotDetected,
|
||||
get_wake_class,
|
||||
)
|
||||
from rhasspy.wake import (ListenForWakeWord, StopListeningForWakeWord,
|
||||
WakeWordDetected, WakeWordNotDetected,
|
||||
get_wake_class)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
@@ -231,6 +218,9 @@ class DialogueManager(RhasspyActor):
|
||||
self._wake: Optional[RhasspyActor] = None
|
||||
self.wake_receiver: Optional[RhasspyActor] = None
|
||||
|
||||
# Name of most recently detected wake word
|
||||
self.wake_detected_name: Optional[str] = None
|
||||
|
||||
# Word pronunciations
|
||||
self.word_pronouncer_class: Optional[Type] = None
|
||||
self._word_pronouncer: Optional[RhasspyActor] = None
|
||||
@@ -371,7 +361,7 @@ class DialogueManager(RhasspyActor):
|
||||
self._logger.debug("%s started", sender_name)
|
||||
|
||||
if len(self.wait_actors) == 0:
|
||||
self._logger.info("Actors loaded")
|
||||
self._logger.debug("Actors loaded")
|
||||
self.transition("ready")
|
||||
|
||||
# Inform all actors that we're ready
|
||||
@@ -429,6 +419,7 @@ class DialogueManager(RhasspyActor):
|
||||
"""Handle messages in asleep state."""
|
||||
if isinstance(message, WakeWordDetected):
|
||||
self._logger.debug("Awake!")
|
||||
self.wake_detected_name = message.name
|
||||
self.transition("awake")
|
||||
if self.wake_receiver is not None:
|
||||
self.send(self.wake_receiver, message)
|
||||
@@ -493,6 +484,7 @@ class DialogueManager(RhasspyActor):
|
||||
"text": message.text,
|
||||
"likelihood": 1,
|
||||
"seconds": 0,
|
||||
"wakeId": self.wake_detected_name or ""
|
||||
}
|
||||
).encode()
|
||||
|
||||
@@ -523,6 +515,9 @@ class DialogueManager(RhasspyActor):
|
||||
# Forward to audio recorder
|
||||
self.send(self.recorder, message)
|
||||
|
||||
message.intent["wakeId"] = self.wake_detected_name or ""
|
||||
message.intent["siteId"] = self.site_id
|
||||
|
||||
# Augment with extra entities
|
||||
entities = message.intent.get("entities", [])
|
||||
entities.extend(self.listen_entities)
|
||||
@@ -723,7 +718,16 @@ class DialogueManager(RhasspyActor):
|
||||
)
|
||||
elif isinstance(message, SpeakSentence):
|
||||
# text -> speech
|
||||
self.send(self.speech, SpeakSentence(message.sentence, receiver=sender))
|
||||
self.send(
|
||||
self.speech,
|
||||
SpeakSentence(
|
||||
message.sentence,
|
||||
receiver=sender,
|
||||
play=message.play,
|
||||
voice=message.voice,
|
||||
language=message.language,
|
||||
),
|
||||
)
|
||||
elif isinstance(message, TrainProfile):
|
||||
# Training
|
||||
self.reload_actors_after_training = message.reload_actors
|
||||
|
||||
@@ -10,9 +10,9 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Type
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import networkx as nx
|
||||
import pywrapfst as fst
|
||||
import pydash
|
||||
import requests
|
||||
from rhasspynlu import json_to_graph, recognize
|
||||
|
||||
from rhasspy.actor import RhasspyActor
|
||||
from rhasspy.tts import SpeakSentence
|
||||
@@ -150,7 +150,6 @@ class RemoteRecognizer(RhasspyActor):
|
||||
|
||||
def recognize(self, text: str) -> Dict[str, Any]:
|
||||
"""POST to remote server and return response."""
|
||||
import requests
|
||||
|
||||
params = {"profile": self.profile.name, "nohass": True}
|
||||
response = requests.post(self.remote_url, params=params, data=text.encode())
|
||||
@@ -170,8 +169,7 @@ class FsticuffsRecognizer(RhasspyActor):
|
||||
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.fst: Optional[Any] = None
|
||||
self.graph: Optional[Any] = None
|
||||
self.graph: Optional[nx.DiGraph] = None
|
||||
self.words: Set[str] = set()
|
||||
self.stop_words: Set[str] = set()
|
||||
self.fuzzy: bool = True
|
||||
@@ -182,7 +180,7 @@ class FsticuffsRecognizer(RhasspyActor):
|
||||
self.preload = self.config.get("preload", False)
|
||||
if self.preload:
|
||||
try:
|
||||
self.load_fst()
|
||||
self.load_graph()
|
||||
except Exception as e:
|
||||
self._logger.warning("preload: %s", e)
|
||||
|
||||
@@ -194,14 +192,26 @@ class FsticuffsRecognizer(RhasspyActor):
|
||||
"""Handle messages in loaded state."""
|
||||
if isinstance(message, RecognizeIntent):
|
||||
try:
|
||||
self.load_fst()
|
||||
self.load_graph()
|
||||
|
||||
if self.fuzzy:
|
||||
# Fuzzy search
|
||||
intent = self.recognize_fuzzy(message.text)
|
||||
else:
|
||||
# Strict search
|
||||
intent = self.recognize(message.text)
|
||||
# Assume lower case, white-space separated tokens
|
||||
text = message.text
|
||||
tokens = re.split(r"\s+", text)
|
||||
|
||||
if self.profile.get("intent.fsticuffs.ignore_unknown_words", True):
|
||||
# Filter tokens
|
||||
tokens = [w for w in tokens if w in self.words]
|
||||
|
||||
recognitions = recognize(
|
||||
tokens, self.graph, fuzzy=self.fuzzy, stop_words=self.stop_words
|
||||
)
|
||||
assert recognitions, "No intent recognized"
|
||||
|
||||
# Use first intent
|
||||
recognition = recognitions[0]
|
||||
|
||||
# Convert to JSON
|
||||
intent = recognition.asdict()
|
||||
except Exception:
|
||||
self._logger.exception("in_loaded")
|
||||
intent = empty_intent()
|
||||
@@ -214,242 +224,28 @@ class FsticuffsRecognizer(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def recognize(self, text: str) -> Dict[str, Any]:
|
||||
"""Use FST as acceptor."""
|
||||
from rhasspy.train.jsgf2fst import fstaccept
|
||||
|
||||
# Assume lower case, white-space separated tokens
|
||||
tokens = re.split(r"\s+", text.lower())
|
||||
|
||||
if self.profile.get("intent.fsticuffs.ignore_unknown_words", True):
|
||||
tokens = [w for w in tokens if w in self.words]
|
||||
|
||||
intents = fstaccept(self.fst, tokens)
|
||||
self._logger.debug("Got %s intent(s)", len(intents))
|
||||
|
||||
if len(intents) > 0:
|
||||
self._logger.debug(intents)
|
||||
|
||||
return intents[0]
|
||||
|
||||
def recognize_fuzzy(self, text: str, eps: str = "<eps>") -> Dict[str, Any]:
|
||||
"""Do fuzzy breadth-first search on FST as graph."""
|
||||
from rhasspy.train.jsgf2fst import symbols2intent
|
||||
|
||||
# Assume lower case, white-space separated tokens
|
||||
tokens = re.split(r"\s+", text)
|
||||
|
||||
if self.profile.get("intent.fsticuffs.ignore_unknown_words", True):
|
||||
# Filter tokens
|
||||
tokens = [w for w in tokens if w in self.words]
|
||||
|
||||
# Only run search if there are any tokens
|
||||
intents = []
|
||||
if len(tokens) > 0:
|
||||
intent_symbols_and_costs = FsticuffsRecognizer._get_symbols_and_costs(
|
||||
self.graph, tokens, stop_words=self.stop_words, eps=eps
|
||||
)
|
||||
for symbols, cost in intent_symbols_and_costs.values():
|
||||
intent = symbols2intent(symbols, eps=eps)
|
||||
intent["intent"]["confidence"] = (len(tokens) - cost) / len(tokens)
|
||||
intents.append(intent)
|
||||
|
||||
intents = sorted(
|
||||
intents, key=lambda i: i["intent"]["confidence"], reverse=True
|
||||
def load_graph(self):
|
||||
"""Load intent graph from JSON file."""
|
||||
if self.graph is None:
|
||||
graph_path = self.profile.read_path(
|
||||
self.profile.get("intent.fsticuffs.intent_graph", "intent.json")
|
||||
)
|
||||
|
||||
self._logger.debug("Recognized %s intent(s)", len(intents))
|
||||
with open(graph_path, "r") as graph_file:
|
||||
json_graph = json.load(graph_file)
|
||||
|
||||
# Use first intent
|
||||
if len(intents) > 0:
|
||||
intent = intents[0]
|
||||
|
||||
# Add slots
|
||||
intent["slots"] = {}
|
||||
for ev in intent["entities"]:
|
||||
intent["slots"][ev["entity"]] = ev["value"]
|
||||
|
||||
# Add alternative intents
|
||||
intent["intents"] = []
|
||||
for other_intent in intents[1:]:
|
||||
intent["intents"].append(other_intent)
|
||||
|
||||
self._logger.debug(intents)
|
||||
else:
|
||||
intent = empty_intent()
|
||||
intent["text"] = text
|
||||
|
||||
return intent
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def _get_symbols_and_costs(
|
||||
cls,
|
||||
intent_graph: nx.MultiDiGraph,
|
||||
tokens: List[str],
|
||||
stop_words: Set[str] = None,
|
||||
eps: str = "<eps>",
|
||||
) -> Dict[str, Tuple[List[str], float]]:
|
||||
"""Get FST paths and costs via BFS."""
|
||||
stop_words = stop_words or set()
|
||||
|
||||
# node -> attrs
|
||||
n_data = intent_graph.nodes(data=True)
|
||||
|
||||
# start state
|
||||
start_node = [n for n, data in n_data if data["start"]][0]
|
||||
|
||||
# intent -> (symbols, cost)
|
||||
intent_symbols_and_costs: Dict[str, Tuple[List[str], float]] = {}
|
||||
|
||||
# Lowest cost so far
|
||||
best_cost: float = len(n_data)
|
||||
|
||||
# (node, in_tokens, out_tokens, cost, intent_name)
|
||||
q: List[Tuple[int, List[str], List[str], float, str]] = [
|
||||
(start_node, tokens, [], 0, "")
|
||||
]
|
||||
|
||||
# BFS it up
|
||||
while len(q) > 0:
|
||||
q_node, q_in_tokens, q_out_tokens, q_cost, q_intent = q.pop()
|
||||
|
||||
# Update best intent cost on final state.
|
||||
# Don't bother reporting intents that failed to consume any tokens.
|
||||
if (n_data[q_node]["final"]) and (q_cost < len(tokens)):
|
||||
best_intent_cost = intent_symbols_and_costs.get(q_intent, (None, None))[
|
||||
1
|
||||
]
|
||||
final_cost: float = q_cost + len(
|
||||
q_in_tokens
|
||||
) # remaning tokens count against
|
||||
|
||||
if (best_intent_cost is None) or (final_cost < best_intent_cost):
|
||||
intent_symbols_and_costs[q_intent] = (q_out_tokens, final_cost)
|
||||
|
||||
if final_cost < best_cost:
|
||||
best_cost = final_cost
|
||||
|
||||
if q_cost > best_cost:
|
||||
continue
|
||||
|
||||
# Process child edges
|
||||
for next_node, edges in intent_graph[q_node].items():
|
||||
for edge_data in edges.values():
|
||||
in_label = edge_data["in_label"]
|
||||
out_label = edge_data["out_label"]
|
||||
next_in_tokens = q_in_tokens[:]
|
||||
next_out_tokens = q_out_tokens[:]
|
||||
next_cost = q_cost
|
||||
next_intent = q_intent
|
||||
|
||||
if out_label.startswith("__label__"):
|
||||
next_intent = out_label[9:]
|
||||
|
||||
if in_label != eps:
|
||||
if (len(next_in_tokens) > 0) and (
|
||||
in_label == next_in_tokens[0]
|
||||
):
|
||||
# Consume matching token immediately
|
||||
next_in_tokens.pop(0)
|
||||
|
||||
if out_label != eps:
|
||||
next_out_tokens.append(out_label)
|
||||
else:
|
||||
# Consume non-matching tokens and increase cost unless stop word
|
||||
while (len(next_in_tokens) > 0) and (
|
||||
in_label != next_in_tokens[0]
|
||||
):
|
||||
bad_token = next_in_tokens.pop(0)
|
||||
if bad_token not in stop_words:
|
||||
next_cost += 1
|
||||
else:
|
||||
# Need a non-zero cost for stop words to
|
||||
# avoid case where two FST paths are
|
||||
# identical, save for stop words.
|
||||
next_cost += 0.1
|
||||
|
||||
if len(next_in_tokens) > 0:
|
||||
# Consume matching token
|
||||
next_in_tokens.pop(0)
|
||||
|
||||
if out_label != eps:
|
||||
next_out_tokens.append(out_label)
|
||||
else:
|
||||
# No matching token
|
||||
continue
|
||||
else:
|
||||
# Consume epsilon
|
||||
if out_label != eps:
|
||||
next_out_tokens.append(out_label)
|
||||
|
||||
q.append(
|
||||
(
|
||||
next_node,
|
||||
next_in_tokens,
|
||||
next_out_tokens,
|
||||
next_cost,
|
||||
next_intent,
|
||||
)
|
||||
)
|
||||
|
||||
return intent_symbols_and_costs
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def _fst_to_graph(cls, the_fst: fst.Fst) -> nx.MultiDiGraph:
|
||||
"""Convert a finite state transducer to a directed graph."""
|
||||
zero_weight = fst.Weight.Zero(the_fst.weight_type())
|
||||
in_symbols = the_fst.input_symbols()
|
||||
out_symbols = the_fst.output_symbols()
|
||||
|
||||
g = nx.MultiDiGraph()
|
||||
|
||||
# Add nodes
|
||||
for state in the_fst.states():
|
||||
# Mark final states
|
||||
is_final = the_fst.final(state) != zero_weight
|
||||
g.add_node(state, final=is_final, start=False)
|
||||
|
||||
# Add edges
|
||||
for arc in the_fst.arcs(state):
|
||||
in_label = in_symbols.find(arc.ilabel).decode()
|
||||
out_label = out_symbols.find(arc.olabel).decode()
|
||||
|
||||
g.add_edge(state, arc.nextstate, in_label=in_label, out_label=out_label)
|
||||
|
||||
# Mark start state
|
||||
g.add_node(the_fst.start(), start=True)
|
||||
|
||||
return g
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def load_fst(self):
|
||||
"""Load intent FST."""
|
||||
if self.fst is None:
|
||||
fst_path = self.profile.read_path(
|
||||
self.profile.get("intent.fsticuffs.intent_fst", "intent.fst")
|
||||
)
|
||||
|
||||
self.fst = fst.Fst.read(fst_path)
|
||||
self.graph = json_to_graph(json_graph)
|
||||
|
||||
# Add words from FST
|
||||
in_symbols = self.fst.input_symbols()
|
||||
self.words = set()
|
||||
for i in range(in_symbols.num_symbols()):
|
||||
key = in_symbols.get_nth_key(i)
|
||||
word = in_symbols.find(key).decode()
|
||||
self.words.add(word)
|
||||
|
||||
# Convert to graph
|
||||
self.graph = FsticuffsRecognizer._fst_to_graph(self.fst)
|
||||
for _, data in self.graph.nodes(data=True):
|
||||
if "word" in data:
|
||||
self.words.add(data["word"])
|
||||
|
||||
# Load stop words
|
||||
stop_words_path = self.profile.read_path("stop_words.txt")
|
||||
if os.path.exists(stop_words_path):
|
||||
self._logger.debug(f"Using stop words at {stop_words_path}")
|
||||
self._logger.debug("Using stop words at %s", stop_words_path)
|
||||
with open(stop_words_path, "r") as stop_words_file:
|
||||
self.stop_words = {
|
||||
line.strip()
|
||||
@@ -647,7 +443,6 @@ class RasaIntentRecognizer(RhasspyActor):
|
||||
|
||||
def recognize(self, text: str) -> Dict[str, Any]:
|
||||
"""POST to RasaNLU server and return response."""
|
||||
import requests
|
||||
|
||||
response = requests.post(
|
||||
self.parse_url, json={"text": text, "project": self.project_name}
|
||||
@@ -791,6 +586,7 @@ class FlairRecognizer(RhasspyActor):
|
||||
RhasspyActor.__init__(self)
|
||||
|
||||
try:
|
||||
# pylint: disable=E0401
|
||||
from flair.models import TextClassifier, SequenceTagger
|
||||
except Exception:
|
||||
pass
|
||||
@@ -829,6 +625,7 @@ class FlairRecognizer(RhasspyActor):
|
||||
|
||||
def recognize(self, text: str) -> Dict[str, Any]:
|
||||
"""Run intent classifier and then named-entity recognizer."""
|
||||
# pylint: disable=E0401
|
||||
from flair.data import Sentence
|
||||
|
||||
intent = empty_intent()
|
||||
@@ -871,6 +668,7 @@ class FlairRecognizer(RhasspyActor):
|
||||
|
||||
def load_models(self) -> None:
|
||||
"""Load intent classifier and named entity recognizers."""
|
||||
# pylint: disable=E0401
|
||||
from flair.models import TextClassifier, SequenceTagger
|
||||
|
||||
# Load mapping from intent id to user intent name
|
||||
@@ -930,7 +728,7 @@ class HomeAssistantConversationRecognizer(RhasspyActor):
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.hass_config: Dict[str, Any] = {}
|
||||
self.pem_file: bool = ""
|
||||
self.pem_file: Optional[str] = ""
|
||||
self.handle_speech: bool = True
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type
|
||||
from urllib.parse import urljoin
|
||||
|
||||
@@ -9,6 +10,7 @@ import pydash
|
||||
import requests
|
||||
|
||||
from rhasspy.actor import RhasspyActor
|
||||
from rhasspy.tts import SpeakSentence
|
||||
from rhasspy.utils import hass_request_kwargs
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -53,7 +55,7 @@ class IntentForwarded:
|
||||
|
||||
def get_intent_handler_class(system: str) -> Type[RhasspyActor]:
|
||||
"""Get type for profile intent handlers."""
|
||||
assert system in ["dummy", "hass", "command"], (
|
||||
assert system in ["dummy", "hass", "remote", "command"], (
|
||||
"Invalid intent handler system: %s" % system
|
||||
)
|
||||
|
||||
@@ -62,9 +64,13 @@ def get_intent_handler_class(system: str) -> Type[RhasspyActor]:
|
||||
return HomeAssistantIntentHandler
|
||||
|
||||
if system == "command":
|
||||
# Use command-line speech trainer
|
||||
# Use command-line intent handler
|
||||
return CommandIntentHandler
|
||||
|
||||
if system == "remote":
|
||||
# Use remote HTTP intent handler
|
||||
return RemoteIntentHandler
|
||||
|
||||
# Use dummy handlers as a fallback
|
||||
return DummyIntentHandler
|
||||
|
||||
@@ -88,6 +94,11 @@ class DummyIntentHandler(RhasspyActor):
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HomeAssistantHandleType(str, Enum):
|
||||
EVENT = "event"
|
||||
INTENT = "intent"
|
||||
|
||||
|
||||
class HomeAssistantIntentHandler(RhasspyActor):
|
||||
"""Forward intents to Home Assistant as events."""
|
||||
|
||||
@@ -96,6 +107,7 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
self.hass_config: Dict[str, Any] = {}
|
||||
self.event_type_format = ""
|
||||
self.pem_file = ""
|
||||
self.handle_type: HomeAssistantHandleType = HomeAssistantHandleType.EVENT
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -106,6 +118,13 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
"event_type_format", "rhasspy_{0}"
|
||||
)
|
||||
|
||||
# Method for handling intent:
|
||||
# - send rhasspy_* events (event)
|
||||
# - use intent integration (intent)
|
||||
self.handle_type = self.hass_config.get(
|
||||
"handle_type", HomeAssistantHandleType.EVENT
|
||||
)
|
||||
|
||||
# PEM file for self-signed HA certificates
|
||||
self.pem_file = self.hass_config.get("pem_file", "")
|
||||
if (self.pem_file is not None) and (len(self.pem_file) > 0):
|
||||
@@ -128,6 +147,7 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
elif isinstance(message, ForwardIntent):
|
||||
intent = message.intent
|
||||
try:
|
||||
intent_name = pydash.get(intent, "intent.name", "")
|
||||
event_type: str = ""
|
||||
event_data: Dict[str, Any] = {}
|
||||
|
||||
@@ -141,7 +161,7 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
event_type = intent["hass_event"]["event_type"]
|
||||
event_data = intent["hass_event"]["event_data"]
|
||||
|
||||
self.forward_intent(event_type, event_data)
|
||||
self.forward_intent(intent_name, event_type, event_data)
|
||||
except Exception as e:
|
||||
self._logger.exception("forward_intent")
|
||||
intent["error"] = str(e)
|
||||
@@ -152,7 +172,9 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
|
||||
def handle_intent(self, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Create event for Home Assistant and send it."""
|
||||
if len(pydash.get(intent, "intent.name", "")) == 0:
|
||||
intent_name = pydash.get(intent, "intent.name", "")
|
||||
|
||||
if not intent_name:
|
||||
self._logger.warning("Empty intent. Not sending to Home Assistant")
|
||||
return intent
|
||||
|
||||
@@ -161,23 +183,38 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
# Add a copy of the event to the intent for easier debugging
|
||||
intent["hass_event"] = {"event_type": event_type, "event_data": slots}
|
||||
|
||||
self.forward_intent(event_type, slots)
|
||||
self.forward_intent(intent_name, event_type, slots)
|
||||
return intent
|
||||
|
||||
def forward_intent(self, event_type: str, slots: Dict[str, Any]):
|
||||
def forward_intent(self, intent_name: str, event_type: str, slots: Dict[str, Any]):
|
||||
"""Forward existing event to Home Assistant."""
|
||||
# Base URL of Home Assistant server
|
||||
post_url = urljoin(self.hass_config["url"], "api/events/" + event_type)
|
||||
|
||||
# Send to Home Assistant
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
kwargs["json"] = slots
|
||||
if self.handle_type == HomeAssistantHandleType.INTENT:
|
||||
# Call /api/intent/handle
|
||||
post_url = urljoin(self.hass_config["url"], "api/intent/handle")
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
# Send to Home Assistant
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
kwargs["json"] = {"name": intent_name, "data": slots}
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
|
||||
response = requests.post(post_url, **kwargs)
|
||||
else:
|
||||
# Send event
|
||||
post_url = urljoin(self.hass_config["url"], "api/events/" + event_type)
|
||||
|
||||
# Send to Home Assistant
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
kwargs["json"] = slots
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
|
||||
response = requests.post(post_url, **kwargs)
|
||||
self._logger.debug("POSTed intent to %s", post_url)
|
||||
|
||||
response = requests.post(post_url, **kwargs)
|
||||
self._logger.debug("POSTed intent to %s", post_url)
|
||||
response.raise_for_status()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@@ -189,6 +226,10 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
for entity in intent["entities"]:
|
||||
slots[entity["entity"]] = entity["value"]
|
||||
|
||||
# Add meta slots
|
||||
slots["_text"] = intent.get("text", "")
|
||||
slots["_raw_text"] = intent.get("raw_text", "")
|
||||
|
||||
return event_type, slots
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@@ -210,7 +251,70 @@ class HomeAssistantIntentHandler(RhasspyActor):
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command Intent Recognizer
|
||||
# Remote Intent Handler
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class RemoteIntentHandler(RhasspyActor):
|
||||
"""POST intent JSON to remote server"""
|
||||
|
||||
def __init__(self):
|
||||
RhasspyActor.__init__(self)
|
||||
self.remote_url = ""
|
||||
self.hass_handler: Optional[RhasspyActor] = None
|
||||
self.receiver: Optional[RhasspyActor] = None
|
||||
self.speech_actor: Optional[RhasspyActor] = None
|
||||
self.forward_to_hass = False
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
self.speech_actor = self.config.get("speech")
|
||||
self.remote_url = self.profile.get("handle.remote.url")
|
||||
|
||||
self.forward_to_hass = self.profile.get("handle.forward_to_hass", False)
|
||||
self.hass_handler = self.config.get("hass_handler")
|
||||
|
||||
self.transition("ready")
|
||||
|
||||
def in_ready(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, HandleIntent):
|
||||
self.receiver = message.receiver or sender
|
||||
intent = message.intent
|
||||
try:
|
||||
# JSON -> Remote -> JSON
|
||||
response = requests.post(self.remote_url, json=message.intent)
|
||||
response.raise_for_status()
|
||||
|
||||
intent = response.json()
|
||||
self._logger.debug(intent)
|
||||
|
||||
# Check for speech
|
||||
speech = intent.get("speech", {})
|
||||
speech_text = speech.get("text", "")
|
||||
if speech_text and self.speech_actor:
|
||||
self.send(self.speech_actor, SpeakSentence(speech_text))
|
||||
except Exception as e:
|
||||
self._logger.exception("in_started")
|
||||
intent["error"] = str(e)
|
||||
|
||||
if self.forward_to_hass and self.hass_handler:
|
||||
self.transition("forwarding")
|
||||
self.send(self.hass_handler, ForwardIntent(intent))
|
||||
else:
|
||||
# No forwarding
|
||||
self.send(self.receiver, IntentHandled(intent))
|
||||
|
||||
def in_forwarding(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in forwarding state."""
|
||||
if isinstance(message, IntentForwarded):
|
||||
# Return back to sender
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, IntentHandled(message.intent))
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Command Intent Handler
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -220,12 +324,14 @@ class CommandIntentHandler(RhasspyActor):
|
||||
def __init__(self):
|
||||
RhasspyActor.__init__(self)
|
||||
self.command: List[str] = []
|
||||
self.speech_actor: Optional[RhasspyActor] = None
|
||||
self.hass_handler: Optional[RhasspyActor] = None
|
||||
self.receiver: Optional[RhasspyActor] = None
|
||||
self.forward_to_hass = False
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
self.speech_actor = self.config.get("speech")
|
||||
program = os.path.expandvars(self.profile.get("handle.command.program"))
|
||||
arguments = [
|
||||
os.path.expandvars(str(a))
|
||||
@@ -254,6 +360,13 @@ class CommandIntentHandler(RhasspyActor):
|
||||
).stdout.decode()
|
||||
|
||||
intent = json.loads(output)
|
||||
self._logger.debug(intent)
|
||||
|
||||
# Check for speech
|
||||
speech = intent.get("speech", {})
|
||||
speech_text = speech.get("text", "")
|
||||
if speech_text and self.speech_actor:
|
||||
self.send(self.speech_actor, SpeakSentence(speech_text))
|
||||
except Exception as e:
|
||||
self._logger.exception("in_started")
|
||||
intent["error"] = str(e)
|
||||
|
||||
@@ -451,14 +451,19 @@ class FlairIntentTrainer(RhasspyActor):
|
||||
|
||||
def train(self, intent_fst) -> None:
|
||||
"""Train intent classifier and named entity recognizers."""
|
||||
# pylint: disable=E0401
|
||||
from flair.data import Sentence, Token
|
||||
# pylint: disable=E0401
|
||||
from flair.models import SequenceTagger, TextClassifier
|
||||
# pylint: disable=E0401
|
||||
from flair.embeddings import (
|
||||
FlairEmbeddings,
|
||||
StackedEmbeddings,
|
||||
DocumentRNNEmbeddings,
|
||||
)
|
||||
# pylint: disable=E0401
|
||||
from flair.data import TaggedCorpus
|
||||
# pylint: disable=E0401
|
||||
from flair.trainers import ModelTrainer
|
||||
|
||||
# Directory to look for downloaded embeddings
|
||||
|
||||
@@ -58,7 +58,7 @@ class MessageReady:
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Interoperability with Snips.AI Hermes protocol
|
||||
# https://docs.snips.ai/ressources/hermes-protocol
|
||||
# https://docs.snips.ai/reference/hermes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
"""Settings for Rhasspy."""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import json5
|
||||
import pydash
|
||||
|
||||
from rhasspy.utils import recursive_update
|
||||
@@ -39,7 +39,7 @@ class Profile:
|
||||
defaults_path = os.path.join(system_profiles_dir, "defaults.json")
|
||||
with open(defaults_path, "r") as defaults_file:
|
||||
logging.debug("Loading default profile settings from %s", defaults_path)
|
||||
return json.load(defaults_file)
|
||||
return json5.load(defaults_file)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -62,9 +62,9 @@ class Profile:
|
||||
if self.layers in ["all", "defaults"]:
|
||||
defaults_path = os.path.join(self.system_profiles_dir, "defaults.json")
|
||||
with open(defaults_path, "r") as defaults_file:
|
||||
self.json = json.load(defaults_file)
|
||||
self.json = json5.load(defaults_file)
|
||||
defaults_file.seek(0)
|
||||
self.system_json = json.load(defaults_file)
|
||||
self.system_json = json5.load(defaults_file)
|
||||
|
||||
# Load just the system profile.json (on top of defaults)
|
||||
system_profile_path = os.path.join(
|
||||
@@ -72,7 +72,7 @@ class Profile:
|
||||
)
|
||||
|
||||
with open(system_profile_path, "r") as system_profile_file:
|
||||
recursive_update(self.system_json, json.load(system_profile_file))
|
||||
recursive_update(self.system_json, json5.load(system_profile_file))
|
||||
|
||||
# Overlay with profile
|
||||
self.json_path = self.read_path("profile.json")
|
||||
@@ -82,7 +82,7 @@ class Profile:
|
||||
json_path = os.path.join(profiles_dir, self.name, "profile.json")
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, "r") as profile_file:
|
||||
recursive_update(self.json, json.load(profile_file))
|
||||
recursive_update(self.json, json5.load(profile_file))
|
||||
|
||||
def read_path(self, *path_parts: str) -> str:
|
||||
"""Get first readable path in user then system directories."""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Support for guessing word pronunciations"""
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -15,12 +15,16 @@ from rhasspy.utils import load_phoneme_map, read_dict
|
||||
|
||||
|
||||
class SpeakWord:
|
||||
"""Speak a word's pronunciation"""
|
||||
|
||||
def __init__(self, word: str, receiver: Optional[RhasspyActor] = None) -> None:
|
||||
self.word = word
|
||||
self.receiver = receiver
|
||||
|
||||
|
||||
class WordSpoken:
|
||||
"""Response to SpeakWord"""
|
||||
|
||||
def __init__(self, word: str, wav_data: bytes, phonemes: str) -> None:
|
||||
self.word = word
|
||||
self.wav_data = wav_data
|
||||
@@ -28,18 +32,24 @@ class WordSpoken:
|
||||
|
||||
|
||||
class GetWordPhonemes:
|
||||
"""Get eSpeak phonemes for a word"""
|
||||
|
||||
def __init__(self, word: str, receiver: Optional[RhasspyActor] = None) -> None:
|
||||
self.word = word
|
||||
self.receiver = receiver
|
||||
|
||||
|
||||
class WordPhonemes:
|
||||
"""Response to GetWordPhonemes"""
|
||||
|
||||
def __init__(self, word: str, phonemes: str) -> None:
|
||||
self.word = word
|
||||
self.phonemes = phonemes
|
||||
|
||||
|
||||
class GetWordPronunciations:
|
||||
"""Look up or guess word pronunciation(s)"""
|
||||
|
||||
def __init__(
|
||||
self, words: List[str], n: int = 5, receiver: Optional[RhasspyActor] = None
|
||||
) -> None:
|
||||
@@ -49,11 +59,15 @@ class GetWordPronunciations:
|
||||
|
||||
|
||||
class WordPronunciations:
|
||||
"""Response to GetWordPronunciations"""
|
||||
|
||||
def __init__(self, pronunciations: Dict[str, Dict[str, Any]]) -> None:
|
||||
self.pronunciations = pronunciations
|
||||
|
||||
|
||||
class PronunciationFailed:
|
||||
"""Response when g2p fails"""
|
||||
|
||||
def __init__(self, reason: str) -> None:
|
||||
self.reason = reason
|
||||
|
||||
@@ -80,11 +94,14 @@ class PhonetisaurusPronounce(RhasspyActor):
|
||||
RhasspyActor.__init__(self)
|
||||
self.speed = 80 # wpm for speaking
|
||||
self.base_dict: Optional[Dict[str, List[str]]] = None
|
||||
self.speech_system: str = ""
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state"""
|
||||
self.speech_system = self.profile.get("speech_to_text.system", "pocketsphinx")
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state"""
|
||||
if isinstance(message, SpeakWord):
|
||||
espeak_phonemes, wav_data = self.speak(message.word)
|
||||
self.send(
|
||||
@@ -107,6 +124,7 @@ class PhonetisaurusPronounce(RhasspyActor):
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, espeak_str: str, voice: Optional[str] = None) -> Tuple[str, bytes]:
|
||||
"""Speak word pronunciation"""
|
||||
|
||||
# Use eSpeak to pronounce word
|
||||
espeak_command = ["espeak", "-s", str(self.speed), "-x"]
|
||||
@@ -133,6 +151,7 @@ class PhonetisaurusPronounce(RhasspyActor):
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def translate_phonemes(self, phonemes: str) -> str:
|
||||
"""Get eSpeak phonemes for a pronunciation"""
|
||||
# Load map from Sphinx to eSpeak phonemes
|
||||
map_path = self.profile.read_path(
|
||||
self.profile.get(f"speech_to_text.{self.speech_system}.phoneme_map")
|
||||
@@ -150,10 +169,11 @@ class PhonetisaurusPronounce(RhasspyActor):
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def pronounce(self, words: List[str], n: int = 5) -> Dict[str, Dict[str, Any]]:
|
||||
"""Look up or guess word pronunciation(s)"""
|
||||
assert n > 0, "No pronunciations requested"
|
||||
assert len(words) > 0, "No words to look up"
|
||||
|
||||
self._logger.debug("Getting pronunciations for %s" % words)
|
||||
self._logger.debug("Getting pronunciations for %s", words)
|
||||
|
||||
# Load base and custom dictionaries
|
||||
base_dictionary_path = self.profile.read_path(
|
||||
|
||||
@@ -6,10 +6,13 @@ import tempfile
|
||||
import time
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from rhasspy.actor import RhasspyActor
|
||||
from rhasspy.utils import convert_wav
|
||||
from rhasspy.utils import convert_wav, hass_request_kwargs, maybe_convert_wav
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
@@ -42,9 +45,14 @@ class WavTranscription:
|
||||
|
||||
def get_decoder_class(system: str) -> Type[RhasspyActor]:
|
||||
"""Get type for profile speech to text decoder."""
|
||||
assert system in ["dummy", "pocketsphinx", "kaldi", "remote", "command"], (
|
||||
"Invalid speech to text system: %s" % system
|
||||
)
|
||||
assert system in [
|
||||
"dummy",
|
||||
"pocketsphinx",
|
||||
"kaldi",
|
||||
"remote",
|
||||
"hass_stt",
|
||||
"command",
|
||||
], ("Invalid speech to text system: %s" % system)
|
||||
|
||||
if system == "pocketsphinx":
|
||||
# Use pocketsphinx locally
|
||||
@@ -55,6 +63,9 @@ def get_decoder_class(system: str) -> Type[RhasspyActor]:
|
||||
if system == "remote":
|
||||
# Use remote Rhasspy server
|
||||
return RemoteDecoder
|
||||
if system == "hass_stt":
|
||||
# Use Home Assistant STT platform
|
||||
return HomeAssistantSTTIntegration
|
||||
if system == "command":
|
||||
# Use external program
|
||||
return CommandDecoder
|
||||
@@ -90,6 +101,7 @@ class PocketsphinxDecoder(RhasspyActor):
|
||||
self.min_confidence: float = 0
|
||||
self.preload: bool = False
|
||||
self.decoder = None
|
||||
self.open_transcription = False
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -115,6 +127,7 @@ class PocketsphinxDecoder(RhasspyActor):
|
||||
try:
|
||||
self.load_decoder()
|
||||
text, confidence = self.transcribe_wav(message.wav_data)
|
||||
self._logger.debug(text)
|
||||
self.send(
|
||||
message.receiver or sender,
|
||||
WavTranscription(
|
||||
@@ -224,7 +237,6 @@ class PocketsphinxDecoder(RhasspyActor):
|
||||
self._logger.debug("Transcription confidence: %s", confidence)
|
||||
if confidence >= self.min_confidence:
|
||||
# Return best transcription
|
||||
self._logger.debug(hyp.hypstr)
|
||||
return hyp.hypstr, confidence
|
||||
|
||||
self._logger.warning(
|
||||
@@ -243,7 +255,8 @@ class PocketsphinxDecoder(RhasspyActor):
|
||||
problems: Dict[str, Any] = {}
|
||||
|
||||
try:
|
||||
import pocketsphinx
|
||||
# pylint: disable=W0201,W1201,W0611
|
||||
import pocketsphinx # noqa: F401
|
||||
except Exception:
|
||||
problems[
|
||||
"Missing pocketsphinx"
|
||||
@@ -313,8 +326,6 @@ class RemoteDecoder(RhasspyActor):
|
||||
|
||||
def transcribe_wav(self, wav_data: bytes) -> str:
|
||||
"""POST to remote server and return response."""
|
||||
import requests
|
||||
|
||||
headers = {"Content-Type": "audio/wav"}
|
||||
self._logger.debug(
|
||||
"POSTing %d byte(s) of WAV data to %s", len(wav_data), self.remote_url
|
||||
@@ -350,6 +361,7 @@ class KaldiDecoder(RhasspyActor):
|
||||
self.graph_dir: Optional[Path] = None
|
||||
self.decode_path: Optional[Path] = None
|
||||
self.decode_command: List[str] = []
|
||||
self.open_transcription = False
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -393,6 +405,7 @@ class KaldiDecoder(RhasspyActor):
|
||||
"""Handle messages in started state."""
|
||||
if isinstance(message, TranscribeWav):
|
||||
text = self.transcribe_wav(message.wav_data)
|
||||
self._logger.debug(text)
|
||||
self.send(message.receiver or sender, WavTranscription(text))
|
||||
|
||||
def transcribe_wav(self, wav_data: bytes) -> str:
|
||||
@@ -459,11 +472,154 @@ class KaldiDecoder(RhasspyActor):
|
||||
"Missing HCLG.fst"
|
||||
] = f"Graph not found at {hclg_path}. Did you train your profile?"
|
||||
|
||||
conf_path = self.model_dir / "online" / "conf" / "online.conf"
|
||||
if not conf_path.is_file():
|
||||
# assert self.model_dir is not None
|
||||
# conf_path = self.model_dir / "online" / "conf" / "online.conf"
|
||||
# if not conf_path.is_file():
|
||||
# problems[
|
||||
# "Missing online.conf"
|
||||
# ] = f"Configuration file not found at {conf_path}. Did you train your profile?"
|
||||
|
||||
return problems
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Home Assistant STT Integration
|
||||
# https://www.home-assistant.io/integrations/stt
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HomeAssistantSTTIntegration(RhasspyActor):
|
||||
"""Use STT integration to Home Assistant"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.hass_config: Dict[str, Any] = {}
|
||||
self.pem_file: Optional[str] = ""
|
||||
self.platform: Optional[str] = None
|
||||
self.chunk_size: int = 2048
|
||||
self.sample_rate: int = 16000
|
||||
self.bit_rate: int = 16
|
||||
self.channels: int = 1
|
||||
self.language: str = "en-US"
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
self.hass_config = self.profile.get("home_assistant", {})
|
||||
|
||||
# PEM file for self-signed HA certificates
|
||||
self.pem_file = self.hass_config.get("pem_file", "")
|
||||
if self.pem_file:
|
||||
self.pem_file = os.path.expandvars(self.pem_file)
|
||||
self._logger.debug("Using PEM file at %s", self.pem_file)
|
||||
else:
|
||||
self.pem_file = None # disabled
|
||||
|
||||
self.platform = self.profile.get("speech_to_text.hass_stt.platform")
|
||||
self.chunk_size = int(
|
||||
self.profile.get("speech_to_text.hass_stt.chunk_size", 2048)
|
||||
)
|
||||
|
||||
self.sample_rate = int(
|
||||
self.profile.get("speech_to_text.hass_stt.sample_rate", 16000)
|
||||
)
|
||||
self.bit_rate = int(self.profile.get("speech_to_text.hass_stt.bit_rate", 16))
|
||||
self.channels = int(self.profile.get("speech_to_text.hass_stt.channels", 1))
|
||||
self.language = str(
|
||||
self.profile.get("speech_to_text.hass_stt.language", "en-US")
|
||||
)
|
||||
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
if isinstance(message, TranscribeWav):
|
||||
text = self.transcribe_wav(message.wav_data)
|
||||
self.send(message.receiver or sender, WavTranscription(text))
|
||||
|
||||
def transcribe_wav(self, wav_data: bytes) -> str:
|
||||
"""Get text Home Assistant STT platform."""
|
||||
try:
|
||||
assert self.platform, "Missing platform name"
|
||||
|
||||
# Convert WAV to desired format
|
||||
wav_data = maybe_convert_wav(
|
||||
wav_data,
|
||||
rate=self.sample_rate,
|
||||
width=self.bit_rate,
|
||||
channels=self.channels,
|
||||
)
|
||||
|
||||
stt_url = urljoin(self.hass_config["url"], f"api/stt/{self.platform}")
|
||||
|
||||
# Send to Home Assistant
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
|
||||
headers = kwargs.get("headers", {})
|
||||
headers["X-Speech-Content"] = "; ".join(
|
||||
[
|
||||
"format=wav",
|
||||
"codec=pcm",
|
||||
f"sample_rate={self.sample_rate}",
|
||||
f"bit_rate={self.bit_rate}",
|
||||
f"channel={self.channels}",
|
||||
f"language={self.language}",
|
||||
]
|
||||
)
|
||||
|
||||
def generate_chunks() -> Iterable[bytes]:
|
||||
with io.BytesIO(wav_data) as wav_buffer:
|
||||
with wave.open(wav_buffer, "rb") as wav_file:
|
||||
# Send empty WAV as initial chunk (header only)
|
||||
with io.BytesIO() as empty_wav_buffer:
|
||||
empty_wav_file: wave.Wave_write = wave.open(
|
||||
empty_wav_buffer, "wb"
|
||||
)
|
||||
with empty_wav_file:
|
||||
empty_wav_file.setframerate(wav_file.getframerate())
|
||||
empty_wav_file.setsampwidth(wav_file.getsampwidth())
|
||||
empty_wav_file.setnchannels(wav_file.getnchannels())
|
||||
|
||||
yield empty_wav_buffer.getvalue()
|
||||
|
||||
# Stream chunks
|
||||
audio_data = wav_file.readframes(wav_file.getnframes())
|
||||
while audio_data:
|
||||
chunk = audio_data[: self.chunk_size]
|
||||
yield chunk
|
||||
audio_data = audio_data[self.chunk_size :]
|
||||
|
||||
# POST WAV data to STT
|
||||
response = requests.post(stt_url, data=generate_chunks(), **kwargs)
|
||||
response.raise_for_status()
|
||||
|
||||
response_json = response.json()
|
||||
self._logger.debug(response_json)
|
||||
|
||||
assert response_json["result"] == "success"
|
||||
return response_json["text"]
|
||||
|
||||
except Exception:
|
||||
self._logger.exception("transcribe_wav")
|
||||
return ""
|
||||
|
||||
def get_problems(self) -> Dict[str, Any]:
|
||||
"""Get problems at startup."""
|
||||
problems: Dict[str, Any] = {}
|
||||
|
||||
if not self.platform:
|
||||
problems[
|
||||
"Missing online.conf"
|
||||
] = f"Configuration file not found at {conf_path}. Did you train your profile?"
|
||||
"Missing platform name"
|
||||
] = "Expected Home Assistant STT platform name in speech_to_text.hass_stt.platform"
|
||||
|
||||
stt_url = urljoin(self.hass_config["url"], f"api/stt/{self.platform}")
|
||||
try:
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
requests.get(stt_url, **kwargs)
|
||||
except Exception:
|
||||
problems[
|
||||
"Can't contact server"
|
||||
] = f"Unable to reach your Home Assistant STT platform at {stt_url}. Is the platform configured?"
|
||||
|
||||
return problems
|
||||
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
import tempfile
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, Set, Iterable, Any, List, Tuple
|
||||
from collections import deque
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
from num2words import num2words
|
||||
import pywrapfst as fst
|
||||
import networkx as nx
|
||||
import doit
|
||||
|
||||
from doit import create_after
|
||||
from doit.cmd_base import ModuleTaskLoader
|
||||
from doit.doit_cmd import DoitMain
|
||||
from doit.reporter import ConsoleReporter
|
||||
|
||||
from rhasspy.train.jsgf2fst import (
|
||||
get_grammar_dependencies,
|
||||
grammar_to_fsts,
|
||||
slots_to_fsts,
|
||||
make_intent_fst,
|
||||
from rhasspynlu import (
|
||||
parse_ini,
|
||||
intents_to_graph,
|
||||
graph_to_fst,
|
||||
graph_to_json,
|
||||
json_to_graph,
|
||||
jsgf,
|
||||
ini_jsgf,
|
||||
)
|
||||
|
||||
from rhasspy.train.ini_jsgf import make_grammars
|
||||
from rhasspy.train.vocab_dict import make_dict, FORMAT_CMU, FORMAT_JULIUS
|
||||
from rhasspy.profiles import Profile
|
||||
from rhasspy.utils import ppath as utils_ppath, read_dict
|
||||
from rhasspy.utils import (
|
||||
ppath as utils_ppath,
|
||||
read_dict,
|
||||
get_ini_paths,
|
||||
get_all_intents,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("train")
|
||||
_LOGGER = logging.getLogger("train")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
@@ -43,12 +47,15 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
def ppath(query, default=None, write=False):
|
||||
return utils_ppath(profile, profile_dir, query, default, write=write)
|
||||
|
||||
language = profile.get("language", "")
|
||||
|
||||
# Inputs
|
||||
stt_system = profile.get("speech_to_text.system")
|
||||
stt_prefix = f"speech_to_text.{stt_system}"
|
||||
|
||||
# intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
|
||||
sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
|
||||
sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir")
|
||||
base_dictionary = ppath(f"{stt_prefix}.base_dictionary", "base_dictionary.txt")
|
||||
base_language_model = ppath(
|
||||
f"{stt_prefix}.base_language_model", "base_language_model.txt"
|
||||
@@ -57,16 +64,20 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
|
||||
acoustic_model_type = stt_system
|
||||
|
||||
if acoustic_model_type == "pocketsphinx":
|
||||
acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model")
|
||||
kaldi_dir = None
|
||||
elif acoustic_model_type == "kaldi":
|
||||
kaldi_dir = Path(
|
||||
os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi"))
|
||||
)
|
||||
# Pocketsphinx
|
||||
acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model")
|
||||
|
||||
# Kaldi
|
||||
kaldi_dir = Path(
|
||||
os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi"))
|
||||
)
|
||||
kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph", "graph")
|
||||
|
||||
if acoustic_model_type == "kaldi":
|
||||
# Kaldi acoustic models are inside model directory
|
||||
acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
|
||||
else:
|
||||
assert False, f"Unknown acoustic model type: {acoustic_model_type}"
|
||||
_LOGGER.warning("Unsupported acoustic model type: %s", acoustic_model_type)
|
||||
|
||||
# ignore/upper/lower
|
||||
word_casing = profile.get("speech_to_text.dictionary_casing", "ignore").lower()
|
||||
@@ -77,9 +88,6 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
# all/first
|
||||
dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule", "all").lower()
|
||||
|
||||
# Kaldi
|
||||
kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph", "graph")
|
||||
|
||||
# Outputs
|
||||
dictionary = ppath(f"{stt_prefix}.dictionary", "dictionary.txt", write=True)
|
||||
custom_words = ppath(f"{stt_prefix}.custom_words", "custom_words.txt", write=True)
|
||||
@@ -89,6 +97,7 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
base_language_model_fst = ppath(
|
||||
f"{stt_prefix}.base_language_model_fst", "base_language_model.fst", write=True
|
||||
)
|
||||
intent_graph = ppath("intent.fsticiffs.intent_graph", "intent.json", write=True)
|
||||
intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
|
||||
vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
|
||||
unknown_words = ppath(
|
||||
@@ -106,171 +115,152 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Set of used intents
|
||||
intents: Set[str] = set()
|
||||
whitelist = None
|
||||
ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)
|
||||
|
||||
# Default to using all intents
|
||||
intents.update(_get_intents(sentences_ini))
|
||||
# Join ini files into a single combined file and parse
|
||||
_LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths])
|
||||
|
||||
try:
|
||||
intents = get_all_intents(ini_paths)
|
||||
except Exception:
|
||||
_LOGGER.exception("Failed to parse %s", ini_paths)
|
||||
return (1, ["Failed to parse sentences"])
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def task_grammars():
|
||||
"""Transforms sentences.ini into JSGF grammars, one per intent."""
|
||||
maybe_deps = []
|
||||
def get_slot_names(item):
|
||||
"""Yield referenced slot names."""
|
||||
if isinstance(item, jsgf.SlotReference):
|
||||
yield item.slot_name
|
||||
elif isinstance(item, jsgf.Sequence):
|
||||
for sub_item in item.items:
|
||||
for slot_name in get_slot_names(sub_item):
|
||||
yield slot_name
|
||||
elif isinstance(item, jsgf.Rule):
|
||||
for slot_name in get_slot_names(item.rule_body):
|
||||
yield slot_name
|
||||
|
||||
def ini_to_grammars(targets):
|
||||
with open(sentences_ini, "r") as sentences_file:
|
||||
make_grammars(sentences_file, grammar_dir, whitelist=whitelist)
|
||||
def number_transform(word):
|
||||
"""Automatically transform numbers"""
|
||||
if not isinstance(word, jsgf.Word):
|
||||
# Skip anything besides words
|
||||
return
|
||||
|
||||
return {
|
||||
"file_dep": [sentences_ini] + maybe_deps,
|
||||
"targets": [grammar_dir / f"{intent}.gram" for intent in intents],
|
||||
"actions": [ini_to_grammars],
|
||||
}
|
||||
try:
|
||||
n = int(word.text)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 75 -> (seventy five):75
|
||||
number_text = num2words(n, lang=language).replace("-", " ").strip()
|
||||
assert number_text, f"Empty num2words result for {n}"
|
||||
number_words = number_text.split()
|
||||
|
||||
def do_slots_to_fst(slot_names, targets):
|
||||
# Extra arguments for word casing
|
||||
kwargs = {}
|
||||
if word_casing == "upper":
|
||||
kwargs["upper"] = True
|
||||
elif word_casing == "lower":
|
||||
kwargs["lower"] = True
|
||||
if len(number_words) == 1:
|
||||
# Easy case, single word
|
||||
word.text = number_text
|
||||
word.substitution = str(n)
|
||||
else:
|
||||
# Hard case, split into mutliple Words
|
||||
return jsgf.Sequence(
|
||||
text=number_text,
|
||||
type=jsgf.SequenceType.GROUP,
|
||||
substitution=str(n),
|
||||
items=[jsgf.Word(w) for w in number_words],
|
||||
)
|
||||
except ValueError:
|
||||
# Not a number
|
||||
pass
|
||||
|
||||
slot_fsts = slots_to_fsts(slots_dir, slot_names=slot_names, **kwargs)
|
||||
for slot_name, slot_fst in slot_fsts.items():
|
||||
# Slot name will already have "$"
|
||||
slot_fst.write(str(fsts_dir / f"{slot_name}.fst"))
|
||||
def do_intents_to_graph(intents, slot_names, targets):
|
||||
sentences, replacements = ini_jsgf.split_rules(intents)
|
||||
|
||||
def do_grammar_to_fsts(
|
||||
grammar_path: Path, replace_fst_paths: Dict[str, Path], targets
|
||||
):
|
||||
# Load dependent FSTs
|
||||
replace_fsts = {
|
||||
replace_name: fst.Fst.read(str(replace_path))
|
||||
for replace_name, replace_path in replace_fst_paths.items()
|
||||
}
|
||||
# Load slot values
|
||||
for slot_name in slot_names:
|
||||
slot_path = slots_dir / slot_name
|
||||
assert slot_path.is_file(), f"Missing slot file at {slot_path}"
|
||||
|
||||
# Extra arguments for word casing
|
||||
kwargs = {}
|
||||
if word_casing == "upper":
|
||||
kwargs["upper"] = True
|
||||
elif word_casing == "lower":
|
||||
kwargs["lower"] = True
|
||||
# Parse each non-empty line as a JSGF sentence
|
||||
slot_values = []
|
||||
with open(slot_path, "r") as slot_file:
|
||||
for line in slot_file:
|
||||
line = line.strip()
|
||||
if line:
|
||||
sentence = jsgf.Sentence.parse(line)
|
||||
slot_values.append(sentence)
|
||||
|
||||
grammar = grammar_path.read_text()
|
||||
listener = grammar_to_fsts(grammar, replace_fsts=replace_fsts, **kwargs)
|
||||
grammar_name = listener.grammar_name
|
||||
# Replace $slot with sentences
|
||||
replacements[f"${slot_name}"] = slot_values
|
||||
|
||||
# Write FST for each JSGF rule
|
||||
for rule_name, rule_fst in listener.fsts.items():
|
||||
fst_path = fsts_dir / f"{rule_name}.fst"
|
||||
rule_fst.write(str(fst_path))
|
||||
if profile.get("intent.replace_numbers", True):
|
||||
# Replace numbers in parsed sentences
|
||||
for intent_sentences in sentences.values():
|
||||
for sentence in intent_sentences:
|
||||
jsgf.walk_expression(sentence, number_transform, replacements)
|
||||
|
||||
# Write FST for main grammar rule
|
||||
grammar_fst_path = fsts_dir / f"{grammar_name}.fst"
|
||||
assert listener.grammar_fst is not None
|
||||
listener.grammar_fst.write(str(grammar_fst_path))
|
||||
# Convert to directed graph
|
||||
graph = intents_to_graph(intents, replacements)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def do_grammar_dependencies(grammar_path: Path, targets):
|
||||
grammar = grammar_path.read_text()
|
||||
grammar_deps = get_grammar_dependencies(grammar).graph
|
||||
graph_json = nx.readwrite.json_graph.node_link_data(grammar_deps)
|
||||
# Write graph to JSON file
|
||||
json_graph = graph_to_json(graph)
|
||||
with open(targets[0], "w") as graph_file:
|
||||
json.dump(graph_json, graph_file)
|
||||
json.dump(json_graph, graph_file)
|
||||
|
||||
@create_after(executed="grammars")
|
||||
def task_grammar_dependencies():
|
||||
"""Creates grammar dependency graphs from JSGF grammars and relevant slots."""
|
||||
def task_ini_graph():
|
||||
"""sentences.ini -> intent.json"""
|
||||
slot_names = set()
|
||||
for intent_name in intents:
|
||||
for item in intents[intent_name]:
|
||||
for slot_name in get_slot_names(item):
|
||||
slot_names.add(slot_name)
|
||||
|
||||
for intent in intents:
|
||||
grammar_path = grammar_dir / f"{intent}.gram"
|
||||
yield {
|
||||
"name": intent + "_dependencies",
|
||||
"file_dep": [grammar_path],
|
||||
"targets": [str(grammar_path) + ".json"],
|
||||
"actions": [(do_grammar_dependencies, [grammar_path])],
|
||||
}
|
||||
# Add slot files as dependencies
|
||||
deps = [(slots_dir / slot_name) for slot_name in slot_names]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Add profile itself as a dependency
|
||||
profile_json_path = profile_dir / "profile.json"
|
||||
if profile_json_path.is_file():
|
||||
deps.append(profile_json_path)
|
||||
|
||||
@create_after(executed="grammar_dependencies")
|
||||
def task_grammar_fsts():
|
||||
"""Creates grammar FSTs from JSGF grammars and relevant slots."""
|
||||
used_slots: Set[str] = set()
|
||||
|
||||
for intent in intents:
|
||||
grammar_path = grammar_dir / f"{intent}.gram"
|
||||
grammar_dep_path = str(grammar_path) + ".json"
|
||||
|
||||
# Load dependency graph
|
||||
with open(grammar_dep_path, "r") as graph_file:
|
||||
graph_data = json.load(graph_file)
|
||||
grammar_deps = nx.readwrite.json_graph.node_link_graph(graph_data)
|
||||
|
||||
rule_names: Set[str] = set()
|
||||
replace_fst_paths: Dict[str, Path] = {}
|
||||
|
||||
# Process dependencies
|
||||
for node, data in grammar_deps.nodes(data=True):
|
||||
node_type = data["type"]
|
||||
|
||||
if node_type == "slot":
|
||||
# Strip "$"
|
||||
slot_name = node[1:]
|
||||
used_slots.add(slot_name)
|
||||
|
||||
# Path to slot FST
|
||||
replace_fst_paths[node] = fsts_dir / f"{node}.fst"
|
||||
elif node_type == "remote rule":
|
||||
# Path to rule FST
|
||||
replace_fst_paths[node] = fsts_dir / f"{node}.fst"
|
||||
elif node_type == "local rule":
|
||||
rule_names.add(node)
|
||||
|
||||
# All rule/grammar FSTs that will be generated
|
||||
grammar_fst_paths = [
|
||||
fsts_dir / f"{rule_name}.fst" for rule_name in rule_names
|
||||
]
|
||||
grammar_fst_paths.append(fsts_dir / f"{intent}.fst")
|
||||
|
||||
yield {
|
||||
"name": intent + "_fst",
|
||||
"file_dep": [grammar_path, grammar_dep_path]
|
||||
+ list(replace_fst_paths.values()),
|
||||
"targets": grammar_fst_paths,
|
||||
"actions": [(do_grammar_to_fsts, [grammar_path, replace_fst_paths])],
|
||||
}
|
||||
|
||||
# slots -> FST
|
||||
if len(used_slots) > 0:
|
||||
yield {
|
||||
"name": "slot_fsts",
|
||||
"file_dep": [slots_dir / slot_name for slot_name in used_slots],
|
||||
"targets": [fsts_dir / f"${slot_name}.fst" for slot_name in used_slots],
|
||||
"actions": [(do_slots_to_fst, [used_slots])],
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def do_intent_fst(intents: Iterable[str], targets):
|
||||
intent_fsts = {
|
||||
intent: fst.Fst.read(str(fsts_dir / f"{intent}.fst")) for intent in intents
|
||||
}
|
||||
intent_fst = make_intent_fst(intent_fsts)
|
||||
intent_fst.write(targets[0])
|
||||
|
||||
@create_after(executed="grammar_fsts")
|
||||
def task_intent_fst():
|
||||
"""Merges grammar FSTs into single intent.fst."""
|
||||
return {
|
||||
"file_dep": [fsts_dir / f"{intent}.fst" for intent in intents],
|
||||
"file_dep": ini_paths + deps,
|
||||
"targets": [intent_graph],
|
||||
"actions": [(do_intents_to_graph, [intents, slot_names])],
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def do_graph_to_fst(intent_graph, targets):
|
||||
with open(intent_graph, "r") as graph_file:
|
||||
json_graph = json.load(graph_file)
|
||||
|
||||
graph = json_to_graph(json_graph)
|
||||
graph_fst = graph_to_fst(graph)
|
||||
|
||||
# Create symbol tables
|
||||
isymbols = fst.SymbolTable()
|
||||
for symbol, number in graph_fst.input_symbols.items():
|
||||
isymbols.add_symbol(symbol, number)
|
||||
|
||||
osymbols = fst.SymbolTable()
|
||||
for symbol, number in graph_fst.output_symbols.items():
|
||||
osymbols.add_symbol(symbol, number)
|
||||
|
||||
# Compile FST
|
||||
compiler = fst.Compiler(
|
||||
isymbols=isymbols, osymbols=osymbols, keep_isymbols=True, keep_osymbols=True
|
||||
)
|
||||
|
||||
compiler.write(graph_fst.intent_fst)
|
||||
compiled_fst = compiler.compile()
|
||||
|
||||
# Write to file
|
||||
compiled_fst.write(str(targets[0]))
|
||||
|
||||
def task_intent_fst():
|
||||
"""intent.json -> intent.fst"""
|
||||
return {
|
||||
"file_dep": [intent_graph],
|
||||
"targets": [intent_fst],
|
||||
"actions": [(do_intent_fst, [intents])],
|
||||
"actions": [(do_graph_to_fst, [intent_graph])],
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -302,7 +292,7 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
"name": "intent_model",
|
||||
"file_dep": [intent_counts],
|
||||
"targets": [intent_model],
|
||||
"actions": ["ngrammake %(dependencies)s %(targets)s"],
|
||||
"actions": ["ngrammake --method=witten_bell %(dependencies)s %(targets)s"],
|
||||
}
|
||||
|
||||
if base_language_model_weight > 0:
|
||||
@@ -334,8 +324,11 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
with open(targets[0], "w") as vocab_file:
|
||||
input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
|
||||
for i in range(input_symbols.num_symbols()):
|
||||
symbol = input_symbols.find(i).decode().strip()
|
||||
if not (symbol.startswith("__") or symbol.startswith("<")):
|
||||
# Critical that we use get_nth_key here when input symbols
|
||||
# numbering is discontiguous.
|
||||
key = input_symbols.get_nth_key(i)
|
||||
symbol = input_symbols.find(key).decode().strip()
|
||||
if symbol and not (symbol.startswith("__") or symbol.startswith("<")):
|
||||
print(symbol, file=vocab_file)
|
||||
|
||||
if base_language_model_weight > 0:
|
||||
@@ -360,13 +353,6 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
if acoustic_model_type == "julius":
|
||||
dictionary_format = FORMAT_JULIUS
|
||||
|
||||
# Extra arguments for word casing
|
||||
kwargs = {}
|
||||
if word_casing == "upper":
|
||||
kwargs["upper"] = True
|
||||
elif word_casing == "lower":
|
||||
kwargs["lower"] = True
|
||||
|
||||
make_dict(
|
||||
vocab,
|
||||
dictionary_paths,
|
||||
@@ -374,12 +360,13 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
unknown_path=unknown_words,
|
||||
dictionary_format=dictionary_format,
|
||||
merge_rule=dict_merge_rule,
|
||||
**kwargs,
|
||||
upper=(word_casing == "upper"),
|
||||
lower=(word_casing == "lower"),
|
||||
)
|
||||
|
||||
if unknown_words.exists() and g2p_model.exists():
|
||||
# Generate single pronunciation guesses
|
||||
logger.debug("Guessing pronunciations for unknown word(s)")
|
||||
_LOGGER.debug("Guessing pronunciations for unknown word(s)")
|
||||
|
||||
g2p_output = subprocess.check_output(
|
||||
[
|
||||
@@ -477,19 +464,3 @@ def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]:
|
||||
# Run doit main
|
||||
result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])
|
||||
return (result, errors)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Matches an ini header, e.g. [LightState]
|
||||
intent_pattern = re.compile(r"^\[([^\]]+)\]")
|
||||
|
||||
|
||||
def _get_intents(ini_path):
|
||||
"""Yields the names of all intents in a sentences.ini file."""
|
||||
with open(ini_path, "r") as ini_file:
|
||||
for line in ini_file:
|
||||
line = line.strip()
|
||||
match = intent_pattern.match(line)
|
||||
if match:
|
||||
yield match.group(1)
|
||||
|
||||
@@ -198,9 +198,12 @@ def fstprintall(
|
||||
out_file: Optional[TextIO] = None,
|
||||
exclude_meta: bool = True,
|
||||
eps: str = "<eps>",
|
||||
substitute: bool = False,
|
||||
) -> List[List[str]]:
|
||||
sentences = []
|
||||
input_symbols = in_fst.input_symbols()
|
||||
output_symbols = in_fst.output_symbols()
|
||||
in_eps = input_symbols.find(eps)
|
||||
out_eps = output_symbols.find(eps)
|
||||
zero_weight = fst.Weight.Zero(in_fst.weight_type())
|
||||
|
||||
@@ -218,12 +221,25 @@ def fstprintall(
|
||||
|
||||
for arc in in_fst.arcs(state):
|
||||
arc_sentence = list(sentence)
|
||||
if arc.olabel != out_eps:
|
||||
out_symbol = output_symbols.find(arc.olabel).decode()
|
||||
if exclude_meta and out_symbol.startswith("__"):
|
||||
pass # skip __label__, etc.
|
||||
else:
|
||||
arc_sentence.append(out_symbol)
|
||||
if substitute:
|
||||
# Use output label
|
||||
if arc.olabel != out_eps:
|
||||
out_symbol = output_symbols.find(arc.olabel).decode()
|
||||
if exclude_meta and out_symbol.startswith("__"):
|
||||
pass # skip __label__, etc.
|
||||
else:
|
||||
arc_sentence.append(out_symbol)
|
||||
else:
|
||||
# Use input label
|
||||
if arc.ilabel != in_eps:
|
||||
in_symbol = input_symbols.find(arc.ilabel).decode()
|
||||
arc_sentence.append(in_symbol)
|
||||
|
||||
# Use meta output labels
|
||||
if not exclude_meta and (arc.olabel != out_eps):
|
||||
out_symbol = output_symbols.find(arc.olabel).decode()
|
||||
if out_symbol.startswith("__"):
|
||||
arc_sentence.append(out_symbol)
|
||||
|
||||
state_queue.append((arc.nextstate, arc_sentence))
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import requests
|
||||
|
||||
from rhasspy.actor import Configured, ConfigureEvent, RhasspyActor
|
||||
from rhasspy.audio_player import PlayWavData, WavPlayed
|
||||
from rhasspy.utils import hass_request_kwargs
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
@@ -19,16 +20,26 @@ from rhasspy.audio_player import PlayWavData, WavPlayed
|
||||
class SpeakSentence:
|
||||
"""Request to speak a sentence."""
|
||||
|
||||
def __init__(self, sentence: str, receiver: Optional[RhasspyActor] = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
sentence: str,
|
||||
receiver: Optional[RhasspyActor] = None,
|
||||
play: bool = True,
|
||||
voice: Optional[str] = None,
|
||||
language: Optional[str] = None,
|
||||
) -> None:
|
||||
self.sentence = sentence
|
||||
self.receiver = receiver
|
||||
self.play = play
|
||||
self.voice = voice
|
||||
self.language = language
|
||||
|
||||
|
||||
class SentenceSpoken:
|
||||
"""Response when sentence is spoken."""
|
||||
|
||||
def __init__(self, wav_data: bytes):
|
||||
self.wav_data = wav_data
|
||||
def __init__(self, wav_data: Optional[bytes] = None):
|
||||
self.wav_data: bytes = wav_data or bytes()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -44,6 +55,7 @@ def get_speech_class(system: str) -> Type[RhasspyActor]:
|
||||
"picotts",
|
||||
"command",
|
||||
"wavenet",
|
||||
"hass_tts",
|
||||
], ("Invalid text to speech system: %s" % system)
|
||||
|
||||
if system == "espeak":
|
||||
@@ -64,6 +76,9 @@ def get_speech_class(system: str) -> Type[RhasspyActor]:
|
||||
if system == "wavenet":
|
||||
# Use WaveNet text-to-speech system
|
||||
return GoogleWaveNetSentenceSpeaker
|
||||
if system == "hass_tts":
|
||||
# Use Home Assistant TTS platform
|
||||
return HomeAssistantSentenceSpeaker
|
||||
|
||||
# Use dummy as a fallback
|
||||
return DummySentenceSpeaker
|
||||
@@ -78,7 +93,7 @@ class DummySentenceSpeaker(RhasspyActor):
|
||||
def in_started(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in started state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.send(message.receiver or sender, SentenceSpoken(bytes()))
|
||||
self.send(message.receiver or sender, SentenceSpoken())
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -109,9 +124,15 @@ class EspeakSentenceSpeaker(RhasspyActor):
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
voice = message.voice or message.language or self.voice
|
||||
self.wav_data = self.speak(message.sentence, voice=voice)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
@@ -121,12 +142,12 @@ class EspeakSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
def speak(self, sentence: str, voice: Optional[str] = None) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
espeak_cmd = ["espeak"]
|
||||
if self.voice is not None:
|
||||
espeak_cmd.extend(["-v", str(self.voice)])
|
||||
if voice:
|
||||
espeak_cmd.extend(["-v", str(voice)])
|
||||
|
||||
espeak_cmd.append("--stdout")
|
||||
espeak_cmd.append(sentence)
|
||||
@@ -176,9 +197,15 @@ class FliteSentenceSpeaker(RhasspyActor):
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
voice = message.voice or message.language or self.voice
|
||||
self.wav_data = self.speak(message.sentence, voice=voice)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
@@ -188,12 +215,12 @@ class FliteSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
def speak(self, sentence: str, voice: Optional[str] = None) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
flite_cmd = ["flite", "-t", sentence, "-o", "/dev/stdout"]
|
||||
if len(self.voice) > 0:
|
||||
flite_cmd.extend(["-voice", str(self.voice)])
|
||||
if voice:
|
||||
flite_cmd.extend(["-voice", str(voice)])
|
||||
|
||||
self._logger.debug(flite_cmd)
|
||||
|
||||
@@ -248,9 +275,15 @@ class PicoTTSSentenceSpeaker(RhasspyActor):
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
language = message.language or message.voice or self.language
|
||||
self.wav_data = self.speak(message.sentence, language=language)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
@@ -266,12 +299,12 @@ class PicoTTSSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
def speak(self, sentence: str, language: Optional[str] = None) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
pico_cmd = ["pico2wave", "-w", self.wav_path]
|
||||
if len(self.language) > 0:
|
||||
pico_cmd.extend(["-l", str(self.language)])
|
||||
if language:
|
||||
pico_cmd.extend(["-l", str(language)])
|
||||
|
||||
pico_cmd.append(sentence)
|
||||
self._logger.debug(pico_cmd)
|
||||
@@ -306,11 +339,12 @@ class MaryTTSSentenceSpeaker(RhasspyActor):
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.url = ""
|
||||
self.voice = None
|
||||
self.locale = ""
|
||||
self.voice: Optional[str] = None
|
||||
self.locale: str = ""
|
||||
self.player: Optional[RhasspyActor] = None
|
||||
self.receiver: Optional[RhasspyActor] = None
|
||||
self.wav_data = bytes()
|
||||
self.effects: Dict[str, Any] = {}
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -323,6 +357,7 @@ class MaryTTSSentenceSpeaker(RhasspyActor):
|
||||
|
||||
self.voice = self.profile.get("text_to_speech.marytts.voice", None)
|
||||
self.locale = self.profile.get("text_to_speech.marytts.locale", "en-US")
|
||||
self.effects = self.profile.get("text_to_speech.marytts.effects", {})
|
||||
|
||||
self.player = self.config["player"]
|
||||
self.transition("ready")
|
||||
@@ -331,9 +366,16 @@ class MaryTTSSentenceSpeaker(RhasspyActor):
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
voice = message.voice or self.voice
|
||||
locale = message.language or self.locale or "en-US"
|
||||
self.wav_data = self.speak(message.sentence, locale, voice=voice)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
@@ -343,7 +385,7 @@ class MaryTTSSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
def speak(self, sentence: str, locale: str, voice: Optional[str] = None) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
params = {
|
||||
@@ -351,11 +393,12 @@ class MaryTTSSentenceSpeaker(RhasspyActor):
|
||||
"INPUT_TYPE": "TEXT",
|
||||
"AUDIO": "WAVE",
|
||||
"OUTPUT_TYPE": "AUDIO",
|
||||
"LOCALE": self.locale,
|
||||
"LOCALE": locale,
|
||||
}
|
||||
params.update(self.effects)
|
||||
|
||||
if self.voice is not None:
|
||||
params["VOICE"] = self.voice
|
||||
if voice is not None:
|
||||
params["VOICE"] = voice
|
||||
|
||||
self._logger.debug(params)
|
||||
|
||||
@@ -417,8 +460,13 @@ class CommandSentenceSpeaker(RhasspyActor):
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
@@ -516,9 +564,16 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
self.wav_data = bytes()
|
||||
self.receiver = message.receiver or sender
|
||||
try:
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
voice = message.voice or self.voice
|
||||
language_code = message.language or self.language_code
|
||||
self.wav_data = self.speak(message.sentence, voice, language_code)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
except Exception:
|
||||
self._logger.exception("speak")
|
||||
|
||||
@@ -530,7 +585,15 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
|
||||
self._logger.debug("Falling back to %s", self.fallback_actor)
|
||||
self.transition("speaking")
|
||||
self.send(self.fallback_actor, SpeakSentence(message.sentence))
|
||||
self.send(
|
||||
self.fallback_actor,
|
||||
SpeakSentence(
|
||||
message.sentence,
|
||||
play=message.play,
|
||||
voice=message.voice,
|
||||
language=message.language,
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
# Give up
|
||||
self.transition("ready")
|
||||
@@ -551,10 +614,10 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
def speak(self, sentence: str, voice: str, language_code: str) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
# Try to pull WAV from cache first
|
||||
sentence_hash = self._get_sentence_hash(sentence)
|
||||
sentence_hash = self._get_sentence_hash(sentence, voice, language_code)
|
||||
cached_wav_path = os.path.join(
|
||||
self.cache_dir, "{}.wav".format(sentence_hash.hexdigest())
|
||||
)
|
||||
@@ -582,8 +645,8 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
|
||||
self._logger.debug(
|
||||
"Calling Wavenet (lang=%s, voice=%s, gender=%s, rate=%s)",
|
||||
self.language_code,
|
||||
self.voice,
|
||||
language_code,
|
||||
voice,
|
||||
self.gender,
|
||||
self.sample_rate,
|
||||
)
|
||||
@@ -591,17 +654,23 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
from google.cloud import texttospeech
|
||||
|
||||
client = texttospeech.TextToSpeechClient()
|
||||
|
||||
# pylint: disable=E1101
|
||||
synthesis_input = texttospeech.types.SynthesisInput(text=sentence)
|
||||
voice = texttospeech.types.VoiceSelectionParams(
|
||||
language_code=self.language_code,
|
||||
name=self.language_code + "-" + self.voice,
|
||||
|
||||
# pylint: disable=E1101
|
||||
voice_params = texttospeech.types.VoiceSelectionParams(
|
||||
language_code=language_code,
|
||||
name=language_code + "-" + voice,
|
||||
ssml_gender=self.gender,
|
||||
)
|
||||
|
||||
# pylint: disable=E1101
|
||||
audio_config = texttospeech.types.AudioConfig(
|
||||
audio_encoding="LINEAR16", sample_rate_hertz=self.sample_rate
|
||||
)
|
||||
|
||||
response = client.synthesize_speech(synthesis_input, voice, audio_config)
|
||||
response = client.synthesize_speech(synthesis_input, voice_params, audio_config)
|
||||
|
||||
# Save to cache
|
||||
with open(cached_wav_path, "wb") as cached_wav_file:
|
||||
@@ -611,19 +680,151 @@ class GoogleWaveNetSentenceSpeaker(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_sentence_hash(self, sentence: str):
|
||||
def _get_sentence_hash(self, sentence: str, voice: str, language_code: str):
|
||||
"""Get hash for cache."""
|
||||
m = hashlib.md5()
|
||||
m.update(
|
||||
"_".join(
|
||||
[
|
||||
sentence,
|
||||
self.language_code + "-" + self.voice,
|
||||
language_code + "-" + voice,
|
||||
self.gender,
|
||||
str(self.sample_rate),
|
||||
self.language_code,
|
||||
language_code,
|
||||
]
|
||||
).encode("utf-8")
|
||||
)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# HomeAssistant TTS
|
||||
# https://www.home-assistant.io/integrations/tts
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HomeAssistantSentenceSpeaker(RhasspyActor):
|
||||
"""Use Home Assistant TTS platform to generate speech"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.command: List[str] = []
|
||||
self.hass_config: Dict[str, Any] = {}
|
||||
self.pem_file: Optional[str] = ""
|
||||
self.platform: Optional[str] = None
|
||||
|
||||
self.player: Optional[RhasspyActor] = None
|
||||
self.receiver: Optional[RhasspyActor] = None
|
||||
self.wav_data = bytes()
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
self.hass_config = self.profile.get("home_assistant", {})
|
||||
|
||||
# PEM file for self-signed HA certificates
|
||||
self.pem_file = self.hass_config.get("pem_file", "")
|
||||
if self.pem_file:
|
||||
self.pem_file = os.path.expandvars(self.pem_file)
|
||||
self._logger.debug("Using PEM file at %s", self.pem_file)
|
||||
else:
|
||||
self.pem_file = None # disabled
|
||||
|
||||
self.platform = self.profile.get("text_to_speech.hass_tts.platform")
|
||||
|
||||
self.player = self.config["player"]
|
||||
self.transition("ready")
|
||||
|
||||
def in_ready(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in ready state."""
|
||||
if isinstance(message, SpeakSentence):
|
||||
self.receiver = message.receiver or sender
|
||||
self.wav_data = self.speak(message.sentence)
|
||||
|
||||
if message.play:
|
||||
self.transition("speaking")
|
||||
self.send(self.player, PlayWavData(self.wav_data))
|
||||
else:
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
def in_speaking(self, message: Any, sender: RhasspyActor) -> None:
|
||||
"""Handle messages in speaking state."""
|
||||
if isinstance(message, WavPlayed):
|
||||
self.transition("ready")
|
||||
self.send(self.receiver, SentenceSpoken(self.wav_data))
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def speak(self, sentence: str) -> bytes:
|
||||
"""Get WAV buffer for sentence."""
|
||||
try:
|
||||
tts_url = urljoin(self.hass_config["url"], "api/tts_get_url")
|
||||
|
||||
# Send to Home Assistant
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
kwargs["json"] = {"platform": self.platform, "message": sentence}
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
|
||||
# POST to /api/tts_get_url
|
||||
response = requests.post(tts_url, **kwargs)
|
||||
response.raise_for_status()
|
||||
|
||||
response_json = response.json()
|
||||
self._logger.debug(response_json)
|
||||
|
||||
# Download MP3
|
||||
audio_url = response_json["url"]
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
|
||||
if self.pem_file is not None:
|
||||
kwargs["verify"] = self.pem_file
|
||||
|
||||
# GET audio data
|
||||
response = requests.get(audio_url, **kwargs)
|
||||
response.raise_for_status()
|
||||
|
||||
audio_bytes = response.content
|
||||
self._logger.debug("Received %s byte(s) of audio data", len(audio_bytes))
|
||||
|
||||
# Convert to WAV
|
||||
if audio_url.endswith(".mp3"):
|
||||
lame_command = ["lame", "--decode", "-", "-"]
|
||||
self._logger.debug(lame_command)
|
||||
|
||||
return subprocess.run(
|
||||
lame_command, input=audio_bytes, check=True, stdout=subprocess.PIPE
|
||||
).stdout
|
||||
|
||||
# Assume WAV
|
||||
return audio_bytes
|
||||
except Exception:
|
||||
self._logger.exception("speak")
|
||||
return bytes()
|
||||
|
||||
def get_problems(self) -> Dict[str, Any]:
|
||||
"""Get problems at startup."""
|
||||
problems: Dict[str, Any] = {}
|
||||
|
||||
if not shutil.which("lame"):
|
||||
problems[
|
||||
"Missing LAME MP3 encoding"
|
||||
] = "LAME MP3 encoder is not installed. Try apt-get install lame"
|
||||
|
||||
if not self.platform:
|
||||
problems[
|
||||
"Missing platform name"
|
||||
] = "Expected Home Assistant TTS platform name in text_to_speech.hass_tts.platform"
|
||||
|
||||
api_url = urljoin(self.hass_config["url"], "api/")
|
||||
try:
|
||||
kwargs = hass_request_kwargs(self.hass_config, self.pem_file)
|
||||
requests.get(api_url, **kwargs)
|
||||
except Exception:
|
||||
problems[
|
||||
"Can't contact server"
|
||||
] = f"Unable to reach your Home Assistant at {api_url}. Is it running?"
|
||||
|
||||
return problems
|
||||
|
||||
@@ -4,7 +4,8 @@ import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from typing import Any, Dict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from rhasspy.profiles import Profile
|
||||
|
||||
@@ -23,7 +24,9 @@ class SpeechTuner:
|
||||
"""Cache import stuff upfront."""
|
||||
pass
|
||||
|
||||
def tune(self, wav_intents: Dict[str, Dict[str, Any]]) -> None:
|
||||
def tune(
|
||||
self, wav_intents: Dict[str, Dict[str, Any]], mllr_path: Optional[Path] = None
|
||||
) -> None:
|
||||
"""Tunes a speech system with WAV file paths mapped to intents."""
|
||||
pass
|
||||
|
||||
@@ -38,6 +41,7 @@ class SphinxTrainSpeechTuner(SpeechTuner):
|
||||
"""Uses sphinxtrain tools to generate an MLLR matrix for an acoustic model."""
|
||||
|
||||
def tune(self, wav_intents: Dict[str, Dict[str, Any]], mllr_path=None) -> None:
|
||||
"""Generate MLLR matrix for Pocketsphinx model"""
|
||||
ps_config = self.profile.get("speech_to_text.pocketsphinx")
|
||||
|
||||
# Load decoder settings
|
||||
@@ -54,15 +58,15 @@ class SphinxTrainSpeechTuner(SpeechTuner):
|
||||
mdef_path,
|
||||
]
|
||||
|
||||
logger.debug("Creating mdef.txt: %s" % mdef_command)
|
||||
logger.debug("Creating mdef.txt: %s", mdef_command)
|
||||
subprocess.check_call(mdef_command)
|
||||
|
||||
# Copy WAV files into temporary directory with unique names
|
||||
fileid_intents = {}
|
||||
logger.debug("Copying %s WAV file(s) to %s" % (len(wav_intents), temp_dir))
|
||||
logger.debug("Copying %s WAV file(s) to %s", len(wav_intents), temp_dir)
|
||||
for wav_path in list(wav_intents.keys()):
|
||||
if not os.path.exists(wav_path):
|
||||
logger.warn("Skipping %s (does not exist)" % wav_path)
|
||||
logger.warning("Skipping %s (does not exist)", wav_path)
|
||||
continue
|
||||
|
||||
# Copy WAV file
|
||||
@@ -78,10 +82,10 @@ class SphinxTrainSpeechTuner(SpeechTuner):
|
||||
# Write fileids (just the file name, no extension)
|
||||
fileids_path = os.path.join(temp_dir, "fileids")
|
||||
with open(fileids_path, "w") as fileids_file:
|
||||
for file_id in fileid_intents.keys():
|
||||
for file_id in fileid_intents:
|
||||
print(file_id, file=fileids_file)
|
||||
|
||||
logger.debug("Wrote %s fileids" % len(fileid_intents))
|
||||
logger.debug("Wrote %s fileids", len(fileid_intents))
|
||||
|
||||
# Write transcription.txt
|
||||
transcription_path = os.path.join(temp_dir, "transcription.txt")
|
||||
@@ -90,7 +94,7 @@ class SphinxTrainSpeechTuner(SpeechTuner):
|
||||
text = fileid_intents[file_id]["text"].strip()
|
||||
print("%s (%s.wav)" % (text, file_id), file=transcription_file)
|
||||
|
||||
logger.debug("Wrote %s" % transcription_path)
|
||||
logger.debug("Wrote %s", transcription_path)
|
||||
|
||||
# Extract features
|
||||
feat_params_path = os.path.join(hmm_path, "feat.params")
|
||||
@@ -181,7 +185,7 @@ class SphinxTrainSpeechTuner(SpeechTuner):
|
||||
logger.debug(solve_command)
|
||||
subprocess.check_call(solve_command)
|
||||
|
||||
logger.debug("Generated MLLR matrix: %s" % mllr_path)
|
||||
logger.debug("Generated MLLR matrix: %s", mllr_path)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set, Tuple
|
||||
|
||||
import pywrapfst as fst
|
||||
import rhasspynlu
|
||||
from num2words import num2words
|
||||
|
||||
WHITESPACE_PATTERN = re.compile(r"\s+")
|
||||
@@ -140,16 +141,17 @@ def recursive_remove(base_dict: Dict[Any, Any], new_dict: Dict[Any, Any]) -> Non
|
||||
def buffer_to_wav(buffer: bytes) -> bytes:
|
||||
"""Wraps a buffer of raw audio data (16-bit, 16Khz mono) in a WAV"""
|
||||
with io.BytesIO() as wav_buffer:
|
||||
with wave.open(wav_buffer, mode="wb") as wav_file:
|
||||
wav_file: wave.Wave_write = wave.open(wav_buffer, mode="wb")
|
||||
with wav_file:
|
||||
wav_file.setframerate(16000)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.writeframesraw(buffer)
|
||||
wav_file.writeframes(buffer)
|
||||
|
||||
return wav_buffer.getvalue()
|
||||
|
||||
|
||||
def convert_wav(wav_data: bytes) -> bytes:
|
||||
def convert_wav(wav_data: bytes, rate=16000, width=16, channels=1) -> bytes:
|
||||
"""Converts WAV data to 16-bit, 16Khz mono with sox."""
|
||||
return subprocess.run(
|
||||
[
|
||||
@@ -158,13 +160,13 @@ def convert_wav(wav_data: bytes) -> bytes:
|
||||
"wav",
|
||||
"-",
|
||||
"-r",
|
||||
"16000",
|
||||
str(rate),
|
||||
"-e",
|
||||
"signed-integer",
|
||||
"-b",
|
||||
"16",
|
||||
str(width),
|
||||
"-c",
|
||||
"1",
|
||||
str(channels),
|
||||
"-t",
|
||||
"wav",
|
||||
"-",
|
||||
@@ -175,17 +177,17 @@ def convert_wav(wav_data: bytes) -> bytes:
|
||||
).stdout
|
||||
|
||||
|
||||
def maybe_convert_wav(wav_data: bytes) -> bytes:
|
||||
def maybe_convert_wav(wav_data: bytes, rate=16000, width=16, channels=1) -> bytes:
|
||||
"""Converts WAV data to 16-bit, 16Khz mono if necessary."""
|
||||
with io.BytesIO(wav_data) as wav_io:
|
||||
with wave.open(wav_io, "rb") as wav_file:
|
||||
rate, width, channels = (
|
||||
wav_file.getframerate(),
|
||||
wav_file.getsampwidth(),
|
||||
wav_file.getnchannels(),
|
||||
)
|
||||
if (rate != 16000) or (width != 2) or (channels != 1):
|
||||
return convert_wav(wav_data)
|
||||
wav_file: wave.Wave_read = wave.open(wav_io, "rb")
|
||||
with wav_file:
|
||||
if (
|
||||
(wav_file.getframerate() != rate)
|
||||
or (wav_file.getsampwidth() != width)
|
||||
or (wav_file.getnchannels() != channels)
|
||||
):
|
||||
return convert_wav(wav_data, rate=rate, width=width, channels=channels)
|
||||
|
||||
return wav_file.readframes(wav_file.getnframes())
|
||||
|
||||
@@ -347,6 +349,7 @@ def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]:
|
||||
def sample_sentences_by_intent(
|
||||
intent_fst_paths: Dict[str, str], num_samples: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate random intents"""
|
||||
from rhasspy.train.jsgf2fst import fstprintall, symbols2intent
|
||||
|
||||
def sample_sentences(intent_name: str, intent_fst_path: str):
|
||||
@@ -403,6 +406,10 @@ def numbers_to_words(
|
||||
sentence: str, language: Optional[str] = None, add_substitution: bool = False
|
||||
) -> str:
|
||||
"""Replaces numbers with words in a sentence. Optionally substitues number back in."""
|
||||
if not language:
|
||||
# Default language
|
||||
language = None
|
||||
|
||||
words = split_whitespace(sentence)
|
||||
changed = False
|
||||
for i, word in enumerate(words):
|
||||
@@ -440,8 +447,10 @@ def split_whitespace(s: str, **kwargs):
|
||||
|
||||
|
||||
def get_wav_duration(wav_bytes: bytes) -> float:
|
||||
"""Return the real-time duration of a WAV file"""
|
||||
with io.BytesIO(wav_bytes) as wav_buffer:
|
||||
with wave.open(wav_buffer) as wav_file:
|
||||
wav_file: wave.Wave_read = wave.open(wav_buffer, "rb")
|
||||
with wav_file:
|
||||
frames = wav_file.getnframes()
|
||||
rate = wav_file.getframerate()
|
||||
return frames / float(rate)
|
||||
@@ -465,11 +474,45 @@ def hass_request_kwargs(
|
||||
headers["X-HA-Access"] = hass_config["api_password"]
|
||||
elif "HASSIO_TOKEN" in os.environ:
|
||||
# Use token from hass.io
|
||||
headers["Authorization"] = "Bearer %s" % os.environ["HASSIO_TOKEN"]
|
||||
headers["X-HASSIO-KEY"] = os.environ["HASSIO_TOKEN"]
|
||||
|
||||
kwargs = {"headers": headers}
|
||||
kwargs: Dict[str, Any] = {"headers": headers}
|
||||
|
||||
if pem_file is not None:
|
||||
kwargs["verify"] = pem_file
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_ini_paths(
|
||||
sentences_ini: Path, sentences_dir: Optional[Path] = None
|
||||
) -> List[Path]:
|
||||
"""Get paths to all .ini files in profile."""
|
||||
ini_paths: List[Path] = []
|
||||
if sentences_ini.is_file():
|
||||
ini_paths = [sentences_ini]
|
||||
|
||||
# Add .ini files from intents directory
|
||||
if sentences_dir and sentences_dir.is_dir():
|
||||
for ini_path in sentences_dir.rglob("*.ini"):
|
||||
ini_paths.append(ini_path)
|
||||
|
||||
return ini_paths
|
||||
|
||||
|
||||
def get_all_intents(ini_paths: List[Path]) -> Dict[str, Any]:
|
||||
"""Get intents from all .ini files in profile."""
|
||||
try:
|
||||
with io.StringIO() as combined_ini_file:
|
||||
for ini_path in ini_paths:
|
||||
combined_ini_file.write(ini_path.read_text())
|
||||
print("", file=combined_ini_file)
|
||||
|
||||
return rhasspynlu.parse_ini(combined_ini_file.getvalue())
|
||||
except Exception:
|
||||
_LOGGER.exception("Failed to parse %s", ini_paths)
|
||||
|
||||
return {}
|
||||
|
||||
@@ -7,7 +7,8 @@ import struct
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Type
|
||||
|
||||
from rhasspy.actor import RhasspyActor
|
||||
from rhasspy.audio_recorder import AudioData, StartStreaming, StopStreaming
|
||||
@@ -288,13 +289,14 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
def __init__(self) -> None:
|
||||
RhasspyActor.__init__(self)
|
||||
self.receivers: List[RhasspyActor] = []
|
||||
self.detector = None
|
||||
self.detectors: List[Any] = []
|
||||
self.preload = False
|
||||
self.not_detected = False
|
||||
self.chunk_size = 960
|
||||
self.recorder: Optional[RhasspyActor] = None
|
||||
self.apply_frontend = False
|
||||
self.model_name = ""
|
||||
self.models: Dict[str, Any] = {}
|
||||
self.model_names: List[str] = []
|
||||
|
||||
def to_started(self, from_state: str) -> None:
|
||||
"""Transition to started state."""
|
||||
@@ -302,10 +304,9 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
self.preload = self.config.get("preload", False)
|
||||
self.not_detected = self.config.get("not_detected", False)
|
||||
self.chunk_size = self.profile.get("wake.snowboy.chunk_size", 960)
|
||||
self.apply_frontend = self.profile.get("wake.snowboy.apply_frontend", False)
|
||||
if self.preload:
|
||||
try:
|
||||
self.load_detector()
|
||||
self.load_detectors()
|
||||
except Exception as e:
|
||||
self._logger.warning("preload: %s", e)
|
||||
|
||||
@@ -315,7 +316,7 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
"""Handle messages in loaded state."""
|
||||
if isinstance(message, ListenForWakeWord):
|
||||
try:
|
||||
self.load_detector()
|
||||
self.load_detectors()
|
||||
self.receivers.append(message.receiver or sender)
|
||||
self.transition("listening")
|
||||
if message.record:
|
||||
@@ -328,31 +329,41 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
if isinstance(message, AudioData):
|
||||
audio_data = message.data
|
||||
chunk = audio_data[: self.chunk_size]
|
||||
detected = False
|
||||
detected = []
|
||||
while len(chunk) > 0:
|
||||
index = self.process_data(chunk)
|
||||
if index > 0:
|
||||
detected = True
|
||||
for detector_index, result_index in enumerate(self.process_data(chunk)):
|
||||
if result_index > 0:
|
||||
detected.append(detector_index)
|
||||
|
||||
if detected:
|
||||
# Don't process the rest of the audio data if hotword has
|
||||
# already been detected.
|
||||
break
|
||||
|
||||
audio_data = audio_data[self.chunk_size :]
|
||||
chunk = audio_data[: self.chunk_size]
|
||||
|
||||
# Handle results
|
||||
if detected:
|
||||
# Detected
|
||||
self._logger.debug("Hotword detected (%s)", self.model_name)
|
||||
detected_event = WakeWordDetected(
|
||||
self.model_name, audio_data_info=message.info
|
||||
)
|
||||
for receiver in self.receivers:
|
||||
self.send(receiver, detected_event)
|
||||
detected_names = [self.model_names[i] for i in detected]
|
||||
self._logger.debug("Hotword(s) detected: %s", detected_names)
|
||||
|
||||
# Send events
|
||||
for model_name in detected_names:
|
||||
detected_event = WakeWordDetected(
|
||||
model_name, audio_data_info=message.info
|
||||
)
|
||||
for receiver in self.receivers:
|
||||
self.send(receiver, detected_event)
|
||||
elif self.not_detected:
|
||||
# Not detected
|
||||
not_detected_event = WakeWordNotDetected(
|
||||
self.model_name, audio_data_info=message.info
|
||||
)
|
||||
for receiver in self.receivers:
|
||||
self.send(receiver, not_detected_event)
|
||||
for model_name in self.model_names:
|
||||
not_detected_event = WakeWordNotDetected(
|
||||
model_name, audio_data_info=message.info
|
||||
)
|
||||
for receiver in self.receivers:
|
||||
self.send(receiver, not_detected_event)
|
||||
elif isinstance(message, StopListeningForWakeWord):
|
||||
self.receivers.remove(message.receiver or sender)
|
||||
if len(self.receivers) == 0:
|
||||
@@ -362,56 +373,90 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def process_data(self, data: bytes) -> int:
|
||||
def process_data(self, data: bytes) -> Iterable[int]:
|
||||
"""Process single chunk of audio data."""
|
||||
assert self.detector is not None
|
||||
try:
|
||||
# Return is:
|
||||
# -2 silence
|
||||
# -1 error
|
||||
# 0 voice
|
||||
# n index n-1
|
||||
return self.detector.RunDetection(data)
|
||||
for detector in self.detectors:
|
||||
# Return is:
|
||||
# -2 silence
|
||||
# -1 error
|
||||
# 0 voice
|
||||
# n index n-1
|
||||
yield detector.RunDetection(data)
|
||||
except Exception:
|
||||
self._logger.exception("process_data")
|
||||
|
||||
return -2
|
||||
# All silences
|
||||
return [-2] * len(self.detectors)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def load_detector(self) -> None:
|
||||
def load_detectors(self) -> None:
|
||||
"""Load snowboy detector."""
|
||||
if self.detector is None:
|
||||
if not self.detectors:
|
||||
from snowboy import snowboydetect, snowboydecoder
|
||||
|
||||
self.model_name = self.profile.get("wake.snowboy.model", "snowboy.umdl")
|
||||
model_path = os.path.realpath(self.profile.read_path(self.model_name))
|
||||
assert os.path.exists(
|
||||
model_path
|
||||
), f"Can't find snowboy model file (expected at {model_path})"
|
||||
# Load model names and settings
|
||||
self.models = self._parse_models()
|
||||
self.model_names = sorted(list(self.models.keys()))
|
||||
|
||||
sensitivity = float(self.profile.get("wake.snowboy.sensitivity", 0.5))
|
||||
audio_gain = float(self.profile.get("wake.snowboy.audio_gain", 1.0))
|
||||
# Create snowboy detectors
|
||||
for model_name in self.model_names:
|
||||
model_settings = self.models[model_name]
|
||||
model_path = Path(self.profile.read_path(model_name))
|
||||
assert model_path.is_file(), f"Missing {model_path}"
|
||||
self._logger.debug("Loading snowboy model from %s", model_path)
|
||||
|
||||
self._logger.debug("Loading snowboy model from %s", model_path)
|
||||
detector = snowboydetect.SnowboyDetect(
|
||||
snowboydecoder.RESOURCE_FILE.encode(), str(model_path).encode()
|
||||
)
|
||||
|
||||
self.detector = snowboydetect.SnowboyDetect(
|
||||
snowboydecoder.RESOURCE_FILE.encode(), model_path.encode()
|
||||
)
|
||||
detector.SetSensitivity(str(model_settings["sensitivity"]).encode())
|
||||
detector.SetAudioGain(float(model_settings["audio_gain"]))
|
||||
detector.ApplyFrontend(bool(model_settings["apply_frontend"]))
|
||||
|
||||
assert self.detector is not None
|
||||
self.detectors.append(detector)
|
||||
self._logger.debug(
|
||||
"Loaded snowboy model %s (%s)", model_name, model_settings
|
||||
)
|
||||
|
||||
sensitivity_str = str(sensitivity).encode()
|
||||
self.detector.SetSensitivity(sensitivity_str)
|
||||
self.detector.SetAudioGain(audio_gain)
|
||||
self.detector.ApplyFrontend(self.apply_frontend)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
self._logger.debug(
|
||||
"Loaded snowboy (model=%s, sensitivity=%s, audio_gain=%s)",
|
||||
model_path,
|
||||
sensitivity,
|
||||
audio_gain,
|
||||
)
|
||||
def _parse_models(self) -> Dict[str, Dict[str, Any]]:
|
||||
# Default sensitivity
|
||||
sensitivity: str = str(self.profile.get("wake.snowboy.sensitivity", "0.5"))
|
||||
|
||||
# Default audio gain
|
||||
audio_gain: float = float(self.profile.get("wake.snowboy.audio_gain", "1.0"))
|
||||
|
||||
# Default frontend
|
||||
apply_frontend: bool = self.profile.get("wake.snowboy.apply_frontend", False)
|
||||
|
||||
model_names: List[str] = self.profile.get(
|
||||
"wake.snowboy.model", "snowboy/snowboy.umdl"
|
||||
).split(",")
|
||||
|
||||
model_settings: Dict[str, Dict[str, Any]] = self.profile.get(
|
||||
"wake.snowboy.model_settings", {}
|
||||
)
|
||||
|
||||
models_dict = {}
|
||||
|
||||
for model_name in model_names:
|
||||
# Add default settings
|
||||
settings = model_settings.get(model_name, {})
|
||||
if "sensitivity" not in settings:
|
||||
settings["sensitivity"] = sensitivity
|
||||
|
||||
if "audio_gain" not in settings:
|
||||
settings["audio_gain"] = audio_gain
|
||||
|
||||
if "apply_frontend" not in settings:
|
||||
settings["apply_frontend"] = apply_frontend
|
||||
|
||||
models_dict[model_name] = settings
|
||||
|
||||
return models_dict
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -419,19 +464,24 @@ class SnowboyWakeListener(RhasspyActor):
|
||||
"""Get problems at startup."""
|
||||
problems: Dict[str, Any] = {}
|
||||
try:
|
||||
from snowboy import snowboydetect, snowboydecoder
|
||||
# pylint: disable=W0611
|
||||
from snowboy import snowboydetect, snowboydecoder # noqa: F401
|
||||
except Exception:
|
||||
problems[
|
||||
"snowboy not installed"
|
||||
] = "The snowboy Python library is not installed. Try pip3 install snowboy"
|
||||
|
||||
model_path = self.profile.read_path(
|
||||
self.profile.get("wake.snowboy.model", "snowboy.umdl")
|
||||
)
|
||||
if not os.path.exists(model_path):
|
||||
problems[
|
||||
"Missing model"
|
||||
] = f"Your snowboy model could not be loaded from {model_path}"
|
||||
# Verify that all snowboy models exist
|
||||
models = self._parse_models()
|
||||
model_paths = [
|
||||
Path(self.profile.read_path(model_name)) for model_name in models
|
||||
]
|
||||
|
||||
for model_path in model_paths:
|
||||
if not model_path.is_file():
|
||||
problems[
|
||||
"Missing model"
|
||||
] = f"Snowboy model could not be loaded from {model_path}"
|
||||
|
||||
return problems
|
||||
|
||||
@@ -446,6 +496,7 @@ class PreciseWakeListener(RhasspyActor):
|
||||
"""Listens for a wake word using Mycroft Precise."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# pylint: disable=E0401
|
||||
from precise_runner import ReadWriteStream
|
||||
|
||||
RhasspyActor.__init__(self)
|
||||
@@ -513,7 +564,7 @@ class PreciseWakeListener(RhasspyActor):
|
||||
|
||||
if self.send_not_detected:
|
||||
# Wait for all chunks to finish processing
|
||||
for i in range(num_chunks):
|
||||
for _ in range(num_chunks):
|
||||
self.prediction_sem.acquire(timeout=0.1)
|
||||
|
||||
# Wait a little bit for the precise engine to finish processing
|
||||
@@ -554,6 +605,7 @@ class PreciseWakeListener(RhasspyActor):
|
||||
def load_runner(self) -> None:
|
||||
"""Load precise runner."""
|
||||
if self.engine is None:
|
||||
# pylint: disable=E0401
|
||||
from precise_runner import PreciseEngine
|
||||
|
||||
self.model_name = self.profile.get("wake.precise.model", "hey-mycroft-2.pb")
|
||||
@@ -568,6 +620,7 @@ class PreciseWakeListener(RhasspyActor):
|
||||
)
|
||||
|
||||
if self.runner is None:
|
||||
# pylint: disable=E0401
|
||||
from precise_runner import PreciseRunner, ReadWriteStream
|
||||
|
||||
self.stream = ReadWriteStream()
|
||||
@@ -607,7 +660,8 @@ class PreciseWakeListener(RhasspyActor):
|
||||
"""Get problems at startup."""
|
||||
problems: Dict[str, Any] = {}
|
||||
try:
|
||||
from precise_runner import PreciseRunner, ReadWriteStream
|
||||
# pylint: disable=E0401,W0611
|
||||
from precise_runner import PreciseRunner, ReadWriteStream # noqa: F401
|
||||
except Exception:
|
||||
problems[
|
||||
"precise_runner not installed"
|
||||
@@ -634,7 +688,7 @@ class PreciseWakeListener(RhasspyActor):
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# MQTT-based wake listener (Hermes protocol)
|
||||
# https://docs.snips.ai/ressources/hermes-protocol
|
||||
# https://docs.snips.ai/reference/hermes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -706,7 +760,7 @@ class PorcupineWakeListener(RhasspyActor):
|
||||
self.chunk_format = ""
|
||||
self.chunk_size = 1024
|
||||
self.handle = None
|
||||
self.keyword_paths = []
|
||||
self.keyword_paths: List[Path] = []
|
||||
self.library_path = ""
|
||||
self.model_path = ""
|
||||
self.preload: bool = False
|
||||
@@ -729,14 +783,16 @@ class PorcupineWakeListener(RhasspyActor):
|
||||
)
|
||||
)
|
||||
self.keyword_paths = [
|
||||
self.profile.read_path(
|
||||
self.profile.get(
|
||||
"wake.porcupine.keyword_path", "porcupine/porcupine.ppn"
|
||||
)
|
||||
)
|
||||
Path(self.profile.read_path(p))
|
||||
for p in self.profile.get(
|
||||
"wake.porcupine.keyword_path", "porcupine/porcupine.ppn"
|
||||
).split(",")
|
||||
]
|
||||
self.sensitivities = [
|
||||
float(self.profile.get("wake.porcupine.sensitivity", 0.5))
|
||||
float(s)
|
||||
for s in str(self.profile.get("wake.porcupine.sensitivity", "0.5")).split(
|
||||
","
|
||||
)
|
||||
]
|
||||
|
||||
self.preload = self.config.get("preload", False)
|
||||
@@ -805,12 +861,15 @@ class PorcupineWakeListener(RhasspyActor):
|
||||
def load_handle(self):
|
||||
"""Load porcupine library."""
|
||||
if self.handle is None:
|
||||
for kw_path in self.keyword_paths:
|
||||
assert kw_path.is_file(), f"Missing {kw_path}"
|
||||
|
||||
from porcupine import Porcupine
|
||||
|
||||
self.handle = Porcupine(
|
||||
self.library_path,
|
||||
self.model_path,
|
||||
keyword_file_paths=self.keyword_paths,
|
||||
keyword_file_paths=[str(p) for p in self.keyword_paths],
|
||||
sensitivities=self.sensitivities,
|
||||
)
|
||||
|
||||
@@ -819,7 +878,7 @@ class PorcupineWakeListener(RhasspyActor):
|
||||
self.chunk_format = "h" * self.handle.frame_length
|
||||
self._logger.debug(
|
||||
"Loaded porcupine (keyword=%s). Expecting sample rate=%s, frame length=%s",
|
||||
self.keyword_paths[0],
|
||||
self.keyword_paths,
|
||||
self.handle.sample_rate,
|
||||
self.handle.frame_length,
|
||||
)
|
||||
|
||||
@@ -6,9 +6,11 @@ max-line-length = 88
|
||||
# E203: Whitespace before ':'
|
||||
# D202 No blank lines allowed after function docstring
|
||||
# W504 line break after binary operator
|
||||
# E731 do not assign a lambda expression, use a def
|
||||
ignore =
|
||||
E501,
|
||||
W503,
|
||||
E203,
|
||||
D202,
|
||||
W504
|
||||
W504,
|
||||
E731
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<div class="collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<div class="navbar-container">
|
||||
<a href="/" class="text-white font-weight-bold">Rhasspy</a>
|
||||
<span class="badge badge-info ml-2">2.4</span>
|
||||
<a href="/api/" class="badge badge-info ml-2">{{ this.version }}</a>
|
||||
<span class="badge badge-pill badge-danger ml-2" v-if="this.numProblems > 0" title="Problems were detected"><i class="fas fa-exclamation"></i></span>
|
||||
</div>
|
||||
<div class="navbar-container ml-auto">
|
||||
@@ -52,6 +52,9 @@
|
||||
<li class="nav-item" v-if="this.numProblems > 0">
|
||||
<a class="nav-link" id="problems-tab" data-toggle="tab" href="#problems" role="tab" aria-controls="problems" aria-selected="true">Problems <span class="badge badge-pill badge-danger">{{ this.numProblems }}</span></a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" id="advanced-tab" data-toggle="tab" href="#advanced" role="tab" aria-controls="advanced" aria-selected="true">Advanced</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" id="log-tab" data-toggle="tab" href="#log" role="tab" aria-controls="log" aria-selected="true">Log</a>
|
||||
</li>
|
||||
@@ -83,6 +86,9 @@
|
||||
<div class="tab-pane fade" id="log" role="tabpanel" aria-labelledby="log-tab">
|
||||
<RhasspyLog :rhasspyLog="rhasspyLog" />
|
||||
</div>
|
||||
<div class="tab-pane fade" id="advanced" role="tabpanel" aria-labelledby="advanced-tab">
|
||||
<AdvancedSettings />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div> <!-- main container -->
|
||||
@@ -138,6 +144,7 @@
|
||||
import Problems from './components/Problems.vue'
|
||||
import RhasspyLog from './components/RhasspyLog.vue'
|
||||
import Slots from './components/Slots.vue'
|
||||
import AdvancedSettings from './components/AdvancedSettings.vue'
|
||||
|
||||
import ProfileDefaults from '@/assets/ProfileDefaults'
|
||||
|
||||
@@ -150,7 +157,8 @@
|
||||
ProfileSettings,
|
||||
Problems,
|
||||
RhasspyLog,
|
||||
Slots
|
||||
Slots,
|
||||
AdvancedSettings
|
||||
},
|
||||
|
||||
data: function() {
|
||||
@@ -176,7 +184,9 @@
|
||||
problems: {},
|
||||
numProblems: 0,
|
||||
|
||||
missingFiles: {}
|
||||
missingFiles: {},
|
||||
|
||||
version: ''
|
||||
}
|
||||
},
|
||||
|
||||
@@ -262,6 +272,7 @@
|
||||
this.training = false
|
||||
this.endAsync()
|
||||
this.getCustomWords()
|
||||
this.getProblems()
|
||||
})
|
||||
},
|
||||
|
||||
@@ -298,12 +309,20 @@
|
||||
|
||||
getProblems: function() {
|
||||
RhasspyService.getProblems()
|
||||
.then(request => {
|
||||
this.problems = request.data
|
||||
this.numProblems = 0
|
||||
for (var actor in this.problems) {
|
||||
this.numProblems += Object.keys(this.problems[actor]).length
|
||||
}
|
||||
})
|
||||
.catch(err => this.error(err))
|
||||
},
|
||||
|
||||
getVersion: function() {
|
||||
RhasspyService.getVersion()
|
||||
.then(request => {
|
||||
this.problems = request.data
|
||||
this.numProblems = 0
|
||||
for (var actor in this.problems) {
|
||||
this.numProblems += Object.keys(this.problems[actor]).length
|
||||
}
|
||||
this.version = request.data
|
||||
})
|
||||
.catch(err => this.error(err))
|
||||
},
|
||||
@@ -329,6 +348,7 @@
|
||||
},
|
||||
|
||||
mounted: function() {
|
||||
this.getVersion()
|
||||
this.getProfile()
|
||||
this.getProfiles()
|
||||
this.getDefaults()
|
||||
|
||||
@@ -27,7 +27,8 @@ const profileDefaults = {
|
||||
"api_password": "",
|
||||
"event_type_format": "rhasspy_{0}",
|
||||
"url": "http://hassio/homeassistant/",
|
||||
"pem_file": ""
|
||||
"pem_file": "",
|
||||
"handle_type": "event"
|
||||
},
|
||||
"handle": {
|
||||
"system": "hass",
|
||||
@@ -35,6 +36,9 @@ const profileDefaults = {
|
||||
"program": "$RHASSPY_BASE_DIR/bin/mock-commands/handle.sh",
|
||||
"arguments": []
|
||||
},
|
||||
"remote": {
|
||||
"url": "http://my-server:port/endpoint"
|
||||
},
|
||||
"forward_to_hass": true
|
||||
},
|
||||
"intent": {
|
||||
@@ -157,6 +161,13 @@ const profileDefaults = {
|
||||
"program": "$RHASSPY_BASE_DIR/bin/mock-commands/speech2text.sh",
|
||||
"arguments": []
|
||||
},
|
||||
"hass_stt": {
|
||||
"platform": "",
|
||||
"sample_rate": 16000,
|
||||
"bit_size": 16,
|
||||
"channels": 1,
|
||||
"language": "en-US"
|
||||
},
|
||||
"sentences_ini": "sentences.ini",
|
||||
"sentences_text": "sentences.txt",
|
||||
"dictionary_casing": "",
|
||||
@@ -260,12 +271,6 @@ const profileDefaults = {
|
||||
"program": "$RHASSPY_BASE_DIR/bin/mock-commands/sleep.sh",
|
||||
"arguments": []
|
||||
},
|
||||
"porcupine": {
|
||||
"library_path": "porcupine/libpv_porcupine.so",
|
||||
"model_path": "porcupine/porcupine_params.pv",
|
||||
"keyword_path": "porcupine/porcupine.ppn",
|
||||
"sensitivity": 0.5
|
||||
},
|
||||
"system": "pocketsphinx"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,16 +2,12 @@
|
||||
<div class="container">
|
||||
<div class="text-muted pl-1">
|
||||
<p>
|
||||
You can edit <a href="https://rhasspy.readthedocs.io/en/latest/profiles/">your Rhasspy profile</a> directly here as JSON. These settings will override the defaults below.
|
||||
You can edit <a href="https://rhasspy.readthedocs.io/en/latest/profiles/">your Rhasspy profile</a> directly here as JSON.
|
||||
<br>
|
||||
Only settings that <strong>differ from the defaults</strong> are shown.
|
||||
</p>
|
||||
</div>
|
||||
<div class="pl-1">
|
||||
<p><strong>Restart required if changes are made</strong></p>
|
||||
</div>
|
||||
|
||||
<form class="form" v-on:submit.prevent="saveProfile">
|
||||
<h2>{{ this.profile.name }}</h2>
|
||||
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<button type="submit" class="btn btn-primary"
|
||||
@@ -32,20 +28,6 @@
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<hr />
|
||||
|
||||
<form class="form" v-on:submit.prevent="saveDefaults">
|
||||
<h2>Defaults</h2>
|
||||
|
||||
<div class="text-muted pl-1">
|
||||
<p>
|
||||
These are the default settings for all <a href="https://rhasspy.readthedocs.io/en/latest/profiles/">your Rhasspy profiles</a>. If a setting is missing in any profile, the value here will be used.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<textarea id="default-settings" class="form-control" type="text" rows="15" v-model="defaultSettings"></textarea>
|
||||
</form>
|
||||
</div> <!-- container -->
|
||||
</template>
|
||||
|
||||
@@ -54,20 +36,22 @@
|
||||
|
||||
export default {
|
||||
name: 'AdvancedSettings',
|
||||
props: {
|
||||
profile : Object,
|
||||
defaults: Object
|
||||
},
|
||||
data: function () {
|
||||
return {
|
||||
profileSettings: '',
|
||||
profileSettingsDirty: false,
|
||||
|
||||
defaultSettings: ''
|
||||
profileSettingsDirty: false
|
||||
}
|
||||
},
|
||||
|
||||
methods: {
|
||||
getProfile: function() {
|
||||
ProfileService.getProfileSettings('profile')
|
||||
.then(request => {
|
||||
this.profileSettings = JSON.stringify(request.data, null, 4)
|
||||
})
|
||||
.catch(err => this.error(err))
|
||||
},
|
||||
|
||||
saveProfile: function() {
|
||||
this.$parent.beginAsync()
|
||||
ProfileService.updateProfileSettings(this.profileSettings)
|
||||
@@ -76,14 +60,15 @@
|
||||
.then(() => {
|
||||
this.$parent.endAsync()
|
||||
this.profileSettingsDirty = false
|
||||
if (confirm("Profile saved. Restart Rhasspy?")) {
|
||||
this.$parent.restart()
|
||||
}
|
||||
})
|
||||
}
|
||||
},
|
||||
|
||||
watch: {
|
||||
profile: function() {
|
||||
this.profileSettings = JSON.stringify(this.profile, null, 4)
|
||||
}
|
||||
mounted: function() {
|
||||
this.getProfile()
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -4,12 +4,9 @@
|
||||
<div class="form-group">
|
||||
<div class="form-row text-muted pl-1">
|
||||
<p>
|
||||
This is a simplified interface to edit your <a href="https://rhasspy.readthedocs.io/en/latest/profiles/">your Rhasspy profile</a>. If you want to access the JSON directly, see <tt>profile.json</tt>.
|
||||
This is a simplified interface to edit your <a href="https://rhasspy.readthedocs.io/en/latest/profiles/">your Rhasspy profile</a>. If you want to access the JSON directly, see the Advanced tab or <tt>profile.json</tt>.
|
||||
</p>
|
||||
</div>
|
||||
<div class="form-row pl-1">
|
||||
<p><strong>Restart required if changes are made</strong></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2>{{ this.profile.name || this.profile.language }}</h2>
|
||||
@@ -26,7 +23,7 @@
|
||||
v-on:restart="$emit('restart')"
|
||||
/>
|
||||
|
||||
<HomeAssistant id="profile-handle" :profile="profile" />
|
||||
<IntentHandling id="profile-handle" :profile="profile" />
|
||||
<button class="btn btn-primary mt-3">Save Settings</button>
|
||||
|
||||
<WakeWord id="profile-wake" :profile="profile" />
|
||||
@@ -64,7 +61,7 @@
|
||||
|
||||
import Overview from '@/components/profile/Overview'
|
||||
import Rhasspy from '@/components/profile/Rhasspy'
|
||||
import HomeAssistant from '@/components/profile/HomeAssistant'
|
||||
import IntentHandling from '@/components/profile/IntentHandling'
|
||||
import WakeWord from '@/components/profile/WakeWord'
|
||||
import VoiceDetection from '@/components/profile/VoiceDetection'
|
||||
import SpeechRecognition from '@/components/profile/SpeechRecognition'
|
||||
@@ -77,7 +74,7 @@
|
||||
name: 'ProfileSettings',
|
||||
components: {
|
||||
Overview,
|
||||
HomeAssistant,
|
||||
IntentHandling,
|
||||
Rhasspy,
|
||||
WakeWord,
|
||||
VoiceDetection,
|
||||
|
||||
@@ -1,26 +1,50 @@
|
||||
<template>
|
||||
<div class="container">
|
||||
<form class="form" v-on:submit.prevent="saveSentences">
|
||||
<form class="form" v-on:submit.prevent="addKey">
|
||||
<div class="form-group">
|
||||
<div class="form-row text-muted pl-1">
|
||||
<p>Example sentences, formatted <a href="https://docs.python.org/3/library/configparser.html">ini style</a>, with each section (intent) containing a <a href="https://rhasspy.readthedocs.io/en/latest/training/#sentencesini">simplified JSGF Grammar</a>.</p>
|
||||
</div>
|
||||
<div class="form-row text-muted pl-1">
|
||||
<p>Sentences shouldn't contain non-words characters like commas and periods. Optional words are <tt>[bracketed]</tt>. Alternatives are <tt>(separated | by | pipes)</tt>. Rules have an <tt>=</tt> after their name, optionally contain <tt>{tags}</tt>, and are referenced <tt><by_name></tt>.</p>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<button type="submit" class="btn btn-primary"
|
||||
v-bind:class="{ 'btn-danger': sentencesDirty }">Save Sentences</button>
|
||||
<div class="form-row">
|
||||
<div class="col">
|
||||
<input type="text" name="iniPath" class="form-control" v-model="newKey" />
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<button type="submit" class="btn btn-success"
|
||||
v-if="sentences"
|
||||
:disabled="sentences[newKey] || newKey.length == 0">Add File</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<select v-model="sentencesKey" class="form-control">
|
||||
<option v-for="(text, key) in sentences" :value="key" v-bind:key="key">
|
||||
{{ key }}
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
<form class="form" v-on:submit.prevent="saveSentences">
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<button type="submit" class="btn btn-primary"
|
||||
v-bind:class="{ 'btn-danger': sentencesDirty }">Save Sentences</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<textarea id="sentences" class="form-control" style="border-width: 3px" type="text" rows="25"
|
||||
v-model="sentences" v-bind:class="{ 'border-danger': sentencesDirty }"
|
||||
v-model="currentSentences" v-bind:class="{ 'border-danger': sentencesDirty }"
|
||||
@input="sentencesDirty=true"></textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<div class="form-row text-muted pl-1">
|
||||
<p>Sentences shouldn't contain non-words characters like commas and periods. Optional words are <tt>[bracketed]</tt>. Alternatives are <tt>(separated | by | pipes)</tt>. Rules have an <tt>=</tt> after their name, optionally contain <tt>{tags}</tt>, and are referenced <tt><by_name></tt>.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<button type="submit" class="btn btn-primary"
|
||||
@@ -39,20 +63,28 @@
|
||||
name: 'TrainLangaugeModel',
|
||||
data: function () {
|
||||
return {
|
||||
sentences: '',
|
||||
sentencesDirty: false
|
||||
sentences: {},
|
||||
sentencesDirty: false,
|
||||
|
||||
sentencesKey: 'sentences.ini',
|
||||
newKey: ''
|
||||
}
|
||||
},
|
||||
|
||||
methods: {
|
||||
saveSentences: function() {
|
||||
this.$parent.beginAsync()
|
||||
LanguageModelService.update_sentences(this.sentences)
|
||||
LanguageModelService.update_sentences(JSON.stringify(this.sentences))
|
||||
.then(request => this.$parent.alert(request.data, 'success'))
|
||||
.then(() => {
|
||||
this.$parent.endAsync()
|
||||
if (confirm("Sentences saved. Train Rhasspy?")) {
|
||||
this.$parent.train()
|
||||
this.getSentences()
|
||||
|
||||
if (!this.sentences[this.sentencesKey]) {
|
||||
this.sentencesKey = 'sentences.ini'
|
||||
}
|
||||
}
|
||||
this.sentencesDirty = false
|
||||
})
|
||||
@@ -65,6 +97,29 @@
|
||||
this.sentences = request.data
|
||||
})
|
||||
.catch(err => this.$parent.error(err))
|
||||
},
|
||||
|
||||
addKey: function() {
|
||||
this.sentences[this.newKey] = ''
|
||||
this.sentencesKey = this.newKey
|
||||
this.newKey = ''
|
||||
$('#sentences').focus()
|
||||
}
|
||||
},
|
||||
|
||||
computed: {
|
||||
currentSentences: {
|
||||
get: function() {
|
||||
if (this.sentences) {
|
||||
return this.sentences[this.sentencesKey]
|
||||
}
|
||||
|
||||
return ''
|
||||
},
|
||||
|
||||
set: function(text) {
|
||||
this.sentences[this.sentencesKey] = text
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<form class="form" v-on:submit.prevent="getIntent">
|
||||
<div class="form-group">
|
||||
<div class="form-row text-muted">
|
||||
<p>Press and hold the "Hold to Record" button, speaking a command, then release the button to test.</p>
|
||||
<p>Press "Hold to Record", speak a command, then release</p>
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="col-auto">
|
||||
@@ -28,7 +28,7 @@
|
||||
<input type="radio" name="microphone" id="rhasspy-microphone" value="rhasspy" v-model="microphone">
|
||||
<label class="ml-2" for="rhasspy-microphone">
|
||||
<i class="fas fa-crow"></i>
|
||||
Use Rhasspy's microphone
|
||||
Use Rhasspy microphone
|
||||
</label>
|
||||
</div>
|
||||
<div class="col">
|
||||
@@ -36,7 +36,7 @@
|
||||
@click="getMicrophonePermission">
|
||||
<label class="ml-2" for="browser-microphone">
|
||||
<i class="fas fa-laptop"></i>
|
||||
Use this web browser's microphone
|
||||
Use browser microphone
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" name="sounds-system" id="sounds-system-mqtt" value="hermes" v-model="profile.sounds.system">
|
||||
<label class="form-check-label" for="sounds-system-mqtt">
|
||||
Play sound remotely with MQTT (<a href="https://docs.snips.ai/ressources/hermes-protocol">Hermes protocol</a>)
|
||||
Play sound remotely with MQTT (<a href="https://docs.snips.ai/reference/hermes">Hermes protocol</a>)
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" name="profile.microphone.system" id="audio-system-mqtt" value="hermes" v-model="profile.microphone.system">
|
||||
<label class="form-check-label" for="audio-system-mqtt">
|
||||
Get microphone input remotely with MQTT (<a href="https://docs.snips.ai/ressources/hermes-protocol">Hermes protocol</a>)
|
||||
Get microphone input remotely with MQTT (<a href="https://docs.snips.ai/reference/hermes">Hermes protocol</a>)
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
<template>
|
||||
<div class="card mt-3">
|
||||
<div class="card-header"><i class="fas fa-home"></i>Home Assistant</div>
|
||||
<div class="card-header"><i class="fas fa-home"></i>Intent Handling</div>
|
||||
<div class="card-body">
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" id="handle-system-dummy" value="dummy" v-model="profile.handle.system">
|
||||
<label class="form-check-label" v-bind:class="{ 'text-danger': profile.handle.system == 'dummy' }" for="handle-system-dummy">
|
||||
Do not use Home Assistant
|
||||
Do not handle intents on this device
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
@@ -59,6 +59,38 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" id="hass-handle-type-event" value="event" v-model="profile.home_assistant.handle_type" :disabled="profile.handle.system != 'hass'">
|
||||
<label class="form-check-label" for="hass-handle-type-event">
|
||||
Send <strong>events</strong> to Home Assistant (<tt>/api/events</tt>)
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="col">
|
||||
<p class="text-muted">
|
||||
Events will be named <tt>{{ profile.home_assistant.event_type_format.replace('{0}', 'INTENT_NAME') }}</tt>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-row mt-2">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" id="hass-handle-type-intent" value="intent" v-model="profile.home_assistant.handle_type" :disabled="profile.handle.system != 'hass'">
|
||||
<label class="form-check-label" for="hass-handle-type-intent">
|
||||
Send <strong>intents</strong> to Home Assistant (<tt>/api/intents</tt>)
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="col">
|
||||
<p class="text-muted">
|
||||
Requires the <a href="https://www.home-assistant.io/integrations/intent/">intent component</a> in your <tt>configuration.yaml</tt>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
@@ -90,13 +122,32 @@
|
||||
<p class="muted">Use <tt>$RHASSPY_PROFILE_DIR</tt> environment variable for the directory of this profile.</p>
|
||||
</div>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" id="handle-system-remote" value="remote" v-model="profile.handle.system">
|
||||
<label class="form-check-label" for="handle-system-remote">
|
||||
Use a remote HTTP server to handle intents
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="remote-handle-url" class="col-form-label">Remote URL</label>
|
||||
<div class="col">
|
||||
<input id="remote-handle-url" type="text" class="form-control" v-model="profile.handle.remote.url" :disabled="profile.intent.system != 'remote'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
export default {
|
||||
name: 'HomeAssistant',
|
||||
name: 'IntentHandling',
|
||||
props: {
|
||||
profile : Object
|
||||
}
|
||||
@@ -24,7 +24,7 @@
|
||||
<div class="form-row">
|
||||
<input type="checkbox" id="mqtt-enabled" v-model="profile.mqtt.enabled">
|
||||
<label for="mqtt-enabled" class="col-form-label">Enable MQTT</label>
|
||||
<span class="col-form-label text-muted">(<a href="https://docs.snips.ai/ressources/hermes-protocol">Snips.ai compatibility</a>)</span>
|
||||
<span class="col-form-label text-muted">(<a href="https://docs.snips.ai/reference/hermes">Snips.ai compatibility</a>)</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
|
||||
@@ -94,6 +94,64 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="radio" name="stt-system" id="stt-system-hass_stt" value="hass_stt" v-model="profile.speech_to_text.system">
|
||||
<label class="form-check-label" for="stt-system-hass_stt">
|
||||
Use a Home Assistant <a href="https://www.home-assistant.io/integrations/stt">STT Platform</a>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="stt-hass-platform" class="col-form-label">STT Platform Name</label>
|
||||
<div class="col">
|
||||
<input id="stt-hass-platform" type="text" class="form-control" v-model="profile.speech_to_text.hass_stt.platform" :disabled="profile.speech_to_text.system != 'hass_stt'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<div class="col text-muted">
|
||||
Rhasspy will stream audio to: {{ profile.home_assistant.url }}api/stt/{{ profile.speech_to_text.hass_stt.platform }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="stt-hass-rate" class="col-form-label">Sample Rate</label>
|
||||
<div class="col">
|
||||
<input id="stt-hass-rate" type="text" class="form-control" v-model="profile.speech_to_text.hass_stt.sample_rate" :disabled="profile.speech_to_text.system != 'hass_stt'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="stt-hass-bitsize" class="col-form-label">Bit Size</label>
|
||||
<div class="col">
|
||||
<input id="stt-hass-bitsize" type="text" class="form-control" v-model="profile.speech_to_text.hass_stt.bit_size" :disabled="profile.speech_to_text.system != 'hass_stt'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="stt-hass-channels" class="col-form-label">Channels</label>
|
||||
<div class="col">
|
||||
<input id="stt-hass-channels" type="text" class="form-control" v-model="profile.speech_to_text.hass_stt.channels" :disabled="profile.speech_to_text.system != 'hass_stt'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-row">
|
||||
<label for="stt-hass-language" class="col-form-label">Language</label>
|
||||
<div class="col">
|
||||
<input id="stt-hass-language" type="text" class="form-control" v-model="profile.speech_to_text.hass_stt.language" :disabled="profile.speech_to_text.system != 'hass_stt'">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||