Files
rhasspy/app.py
T
2020-04-10 11:05:27 -04:00

1351 lines
40 KiB
Python
Executable File

"""Rhasspy web application server."""
import argparse
import asyncio
import atexit
import concurrent.futures
import json
import logging
import os
import re
import shutil
import time
from functools import wraps
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from uuid import uuid4
import attr
import json5
from quart import (
Quart,
Response,
jsonify,
request,
safe_join,
send_file,
send_from_directory,
websocket,
)
from quart_cors import cors
from swagger_ui import quart_api_doc
from rhasspy.actor import ActorSystem, ConfigureEvent, RhasspyActor
from rhasspy.core import RhasspyCore
from rhasspy.events import (
IntentRecognized,
ProfileTrainingFailed,
VoiceCommand,
WakeWordDetected,
WavTranscription,
)
from rhasspy.utils import (
FunctionLoggingHandler,
buffer_to_wav,
get_all_intents,
get_ini_paths,
get_wav_duration,
load_phoneme_examples,
read_dict,
recursive_remove,
)
# -----------------------------------------------------------------------------
# Quart Web App Setup
# -----------------------------------------------------------------------------
logger = logging.getLogger(__name__)
loop = asyncio.get_event_loop()
app = Quart("rhasspy")
app.secret_key = str(uuid4())
app = cors(app)
# WAV data from last voice command
last_voice_wav: Optional[bytes] = None
# -----------------------------------------------------------------------------
# Parse Arguments
# -----------------------------------------------------------------------------
parser = argparse.ArgumentParser("Rhasspy")
parser.add_argument(
"--profile", "-p", required=True, type=str, help="Name of profile to load"
)
parser.add_argument("--host", type=str, help="Host for web server", default="0.0.0.0")
parser.add_argument("--port", type=int, help="Port for web server", default=12101)
parser.add_argument(
"--system-profiles",
type=str,
help="Directory with base profile files (read only)",
default=os.path.join(os.getcwd(), "profiles"),
)
parser.add_argument(
"--user-profiles",
type=str,
help="Directory with user profile files (read/write)",
default=os.path.expanduser("~/.config/rhasspy/profiles"),
)
parser.add_argument(
"--set",
"-s",
nargs=2,
action="append",
help="Set a profile setting value",
default=[],
)
parser.add_argument(
"--ssl", nargs=2, help="Use SSL with <CERT_FILE <KEY_FILE>", default=None
)
parser.add_argument("--log-level", default="DEBUG", help="Set logging level")
args = parser.parse_args()
# Set log level
if "RHASSPY_LOG_LEVEL" in os.environ:
log_level = os.environ["RHASSPY_LOG_LEVEL"]
else:
log_level = args.log_level
logging.basicConfig(level=getattr(logging, log_level.upper()))
logger.debug(args)
system_profiles_dir = os.path.abspath(args.system_profiles)
user_profiles_dir = os.path.abspath(args.user_profiles)
profiles_dirs = [user_profiles_dir, system_profiles_dir]
# -----------------------------------------------------------------------------
# Dialogue Manager Setup
# -----------------------------------------------------------------------------
core = None
# We really, *really* want shutdown to be called
@atexit.register
def shutdown(*_args: Any, **kwargs: Any) -> None:
"""Ensure Rhasspy core is stopped."""
global core
if core is not None:
loop.run_until_complete(loop.create_task(core.shutdown()))
core = None
async def start_rhasspy() -> None:
"""Create actor system and Rhasspy core."""
global core
# Load core
system = ActorSystem()
core = RhasspyCore(
args.profile, system_profiles_dir, user_profiles_dir, actor_system=system
)
# Set environment variables
os.environ["RHASSPY_BASE_DIR"] = os.getcwd()
os.environ["RHASSPY_PROFILE"] = core.profile.name
os.environ["RHASSPY_PROFILE_DIR"] = core.profile.write_dir()
# Add profile settings from the command line
extra_settings = {}
for key, value in args.set:
try:
value = json.loads(value)
except Exception:
pass
logger.debug("Profile: %s=%s", key, value)
extra_settings[key] = value
core.profile.set(key, value)
# Load observer actor to catch intents
observer = system.createActor(WebSocketObserver)
system.ask(observer, ConfigureEvent(core.profile))
await core.start(observer=observer)
logger.info("Started")
# -----------------------------------------------------------------------------
# HTTP API
# -----------------------------------------------------------------------------
@app.route("/api/version")
async def api_version() -> Response:
"""Get Rhasspy version."""
return await send_file(Path("VERSION"))
# -----------------------------------------------------------------------------
@app.route("/api/profiles")
async def api_profiles() -> Response:
"""Get list of available profiles and verify necessary files."""
assert core is not None
profile_names = set()
for profiles_dir in profiles_dirs:
if not os.path.exists(profiles_dir):
continue
for name in os.listdir(profiles_dir):
profile_dir = os.path.join(profiles_dir, name)
if os.path.isdir(profile_dir):
profile_names.add(name)
missing_files = core.check_profile()
downloaded = len(missing_files) == 0
return jsonify(
{
"default_profile": core.profile.name,
"profiles": sorted(profile_names),
"downloaded": downloaded,
"missing_files": missing_files,
}
)
# -----------------------------------------------------------------------------
@app.route("/api/download-profile", methods=["POST"])
async def api_download_profile() -> str:
"""Downloads the current profile."""
assert core is not None
delete = request.args.get("delete", "false").lower() == "true"
await core.download_profile(delete=delete)
return "OK"
@app.route("/api/download-status", methods=["GET"])
async def api_download_status() -> str:
"""Get status of profile download"""
assert core is not None
return "\n".join(core.download_status)
# -----------------------------------------------------------------------------
@app.route("/api/problems", methods=["GET"])
async def api_problems() -> Response:
"""Returns any problems Rhasspy has found."""
assert core is not None
return jsonify(await core.get_problems())
# -----------------------------------------------------------------------------
@app.route("/api/microphones", methods=["GET"])
async def api_microphones() -> Response:
"""Get a dictionary of available recording devices"""
assert core is not None
system = request.args.get("system", None)
return jsonify(await core.get_microphones(system))
# -----------------------------------------------------------------------------
@app.route("/api/test-microphones", methods=["GET"])
async def api_test_microphones() -> Response:
"""Get a dictionary of available, functioning recording devices"""
assert core is not None
system = request.args.get("system", None)
return jsonify(await core.test_microphones(system))
# -----------------------------------------------------------------------------
@app.route("/api/speakers", methods=["GET"])
async def api_speakers() -> Response:
"""Get a dictionary of available playback devices"""
assert core is not None
system = request.args.get("system", None)
return jsonify(await core.get_speakers(system))
# -----------------------------------------------------------------------------
@app.route("/api/listen-for-wake", methods=["POST"])
async def api_listen_for_wake() -> str:
"""Make Rhasspy listen for a wake word"""
assert core is not None
enabled_str = (await request.data).decode().strip().lower()
enabled = enabled_str not in ["false", "off"]
core.listen_for_wake(enabled)
return str(enabled)
# -----------------------------------------------------------------------------
@app.route("/api/listen-for-command", methods=["POST"])
async def api_listen_for_command() -> Response:
"""Wake Rhasspy up and listen for a voice command"""
assert core is not None
no_hass = request.args.get("nohass", "false").lower() == "true"
# Seconds before timing out
timeout = request.args.get("timeout")
if timeout is not None:
timeout = float(timeout)
# Key/value to set in recognized intent
entity = request.args.get("entity")
value = request.args.get("value")
# Emulate wake
wake_json = json.dumps({"wakewordId": "default", "siteId": core.siteId})
await add_ws_event("wake", wake_json)
return jsonify(
await core.listen_for_command(
handle=(not no_hass), timeout=timeout, entity=entity, value=value
)
)
# -----------------------------------------------------------------------------
@app.route("/api/profile", methods=["GET", "POST"])
async def api_profile() -> Union[str, Response]:
"""Read or write profile JSON directly"""
assert core is not None
layers = request.args.get("layers", "all")
if request.method == "POST":
# Ensure that JSON is valid
profile_json = json5.loads(await request.data)
recursive_remove(core.profile.system_json, profile_json)
profile_path = Path(core.profile.write_path("profile.json"))
with open(profile_path, "w") as profile_file:
json.dump(profile_json, profile_file, indent=4)
msg = f"Wrote profile to {profile_path}"
logger.debug(msg)
return msg
if layers == "defaults":
# Read default settings
return jsonify(core.defaults)
if layers == "profile":
# Local settings only
profile_path = Path(core.profile.read_path("profile.json"))
return await send_file(profile_path)
return jsonify(core.profile.json)
# -----------------------------------------------------------------------------
@app.route("/api/lookup", methods=["POST"])
async def api_lookup() -> Response:
"""Get CMU phonemes from dictionary or guessed pronunciation(s)"""
assert core is not None
n = int(request.args.get("n", 5))
assert n > 0, "No pronunciations requested"
data = await request.data
word = data.decode().strip().lower()
assert word, "No word to look up"
result = await core.get_word_pronunciations([word], n)
pronunciations = result.pronunciations
return jsonify(pronunciations[word])
# -----------------------------------------------------------------------------
@app.route("/api/pronounce", methods=["POST"])
async def api_pronounce() -> Union[Response, str]:
"""Pronounce CMU phonemes or word using eSpeak"""
assert core is not None
download = request.args.get("download", "false").lower() == "true"
data = await request.data
pronounce_str = data.decode().strip()
assert pronounce_str, "No string to pronounce"
# phonemes or word
pronounce_type = request.args.get("type", "phonemes")
if pronounce_type == "phonemes":
# Convert from Sphinx to espeak phonemes
phoneme_result = await core.get_word_phonemes(pronounce_str)
espeak_str = phoneme_result.phonemes
else:
# Speak word directly
espeak_str = pronounce_str
speak_result = await core.speak_word(espeak_str)
wav_data = speak_result.wav_data
espeak_phonemes = speak_result.phonemes
if download:
# Return WAV
return Response(wav_data, mimetype="audio/wav")
# Play through speakers
core.play_wav_data(wav_data)
return espeak_phonemes
# -----------------------------------------------------------------------------
@app.route("/api/play-wav", methods=["POST"])
async def api_play_wav() -> str:
"""Play WAV data through the configured audio output system"""
assert core is not None
if request.content_type == "audio/wav":
wav_data = await request.data
else:
# Interpret as URL
data = await request.data
url = data.decode()
logger.debug("Loading WAV data from %s", url)
async with core.session.get(url) as response:
wav_data = await response.read()
# Play through speakers
logger.debug("Playing %s byte(s)", len(wav_data))
core.play_wav_data(wav_data)
return "OK"
# -----------------------------------------------------------------------------
@app.route("/api/phonemes")
def api_phonemes():
"""Get phonemes and example words for a profile"""
assert core is not None
speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")
examples_path = Path(
core.profile.read_path(
core.profile.get(
f"speech_to_text.{speech_system}.phoneme_examples",
"phoneme_examples.txt",
)
)
)
# phoneme -> { word, phonemes }
logger.debug("Loading phoneme examples from %s", examples_path)
examples_dict = load_phoneme_examples(examples_path)
return jsonify(examples_dict)
# -----------------------------------------------------------------------------
@app.route("/api/sentences", methods=["GET", "POST"])
async def api_sentences():
"""Read or write sentences for a profile"""
assert core is not None
if request.method == "POST":
# POST
if request.mimetype == "application/json":
# Update multiple ini files at once. Paths as keys (relative to
# profile directory), sentences as values.
num_chars = 0
paths_written = []
sentences_dict = json5.loads(await request.data)
for sentences_path, sentences_text in sentences_dict.items():
# Path is relative to profile directory
sentences_path = Path(core.profile.write_path(sentences_path))
if sentences_text.strip():
# Overwrite file
logger.debug("Writing %s", sentences_path)
sentences_path.parent.mkdir(parents=True, exist_ok=True)
sentences_path.write_text(sentences_text)
num_chars += len(sentences_text)
paths_written.append(sentences_path)
elif sentences_path.is_file():
# Remove file
logger.debug("Removing %s", sentences_path)
sentences_path.unlink()
return f"Wrote {num_chars} char(s) to {[str(p) for p in paths_written]}"
# Update sentences.ini only
sentences_path = Path(
core.profile.write_path(core.profile.get("speech_to_text.sentences_ini"))
)
data = await request.data
with open(sentences_path, "wb") as sentences_file:
sentences_file.write(data)
return f"Wrote {len(data)} byte(s) to {sentences_path}"
# GET
sentences_path_rel = core.profile.read_path(
core.profile.get("speech_to_text.sentences_ini")
)
sentences_path = Path(sentences_path_rel)
if prefers_json():
# Return multiple .ini files, keyed by path relative to profile
# directory.
sentences_dict = {}
if sentences_path.is_file():
try:
# Try user profile dir first
profile_dir = Path(core.profile.user_profiles_dir) / core.profile.name
key = str(sentences_path.relative_to(profile_dir))
except Exception:
# Fall back to system profile dir
profile_dir = Path(core.profile.system_profiles_dir) / core.profile.name
key = str(sentences_path.relative_to(profile_dir))
sentences_dict[key] = sentences_path.read_text()
ini_dir = Path(
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
)
# Add all .ini files from sentences_dir
if ini_dir.is_dir():
for ini_path in ini_dir.glob("*.ini"):
key = str(ini_path.relative_to(core.profile.read_path()))
sentences_dict[key] = ini_path.read_text()
return jsonify(sentences_dict)
# Return sentences.ini contents only
if not sentences_path.is_file():
return "" # no sentences yet
# Return file contents
return await send_file(sentences_path)
# -----------------------------------------------------------------------------
@app.route("/api/custom-words", methods=["GET", "POST"])
async def api_custom_words():
"""Read or write custom word dictionary for a profile"""
assert core is not None
speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")
# Temporary fix for kaldi/custom_words -> kaldi_custom_words.txt
old_kaldi_words_path = Path(core.profile.read_path("kaldi/custom_words.txt"))
if old_kaldi_words_path.is_file():
new_kaldi_words_path = Path(
core.profile.write_path(
core.profile.get(
"speech_to_text.kaldi.custom_words", "custom_words.txt"
)
)
)
if (
new_kaldi_words_path != old_kaldi_words_path
and not new_kaldi_words_path.is_file()
):
logger.warning(
"Moving %s to %s", str(old_kaldi_words_path), str(new_kaldi_words_path)
)
shutil.move(old_kaldi_words_path, new_kaldi_words_path)
if request.method == "POST":
custom_words_path = Path(
core.profile.write_path(
core.profile.get(
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
)
)
)
# Update custom words
lines_written = 0
with open(custom_words_path, "w") as custom_words_file:
data = await request.data
lines = data.decode().splitlines()
for line in lines:
line = line.strip()
if not line:
continue
print(line, file=custom_words_file)
lines_written += 1
return f"Wrote {lines_written} line(s) to {custom_words_path}"
custom_words_path = Path(
core.profile.read_path(
core.profile.get(
f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
)
)
)
# Return custom_words
if prefers_json():
if not custom_words_path.is_file():
return jsonify({}) # no custom_words yet
with open(custom_words_path, "r") as words_file:
return jsonify(read_dict(words_file))
else:
if not custom_words_path.is_file():
return "" # no custom_words yet
# Return file contents
return await send_file(custom_words_path)
# -----------------------------------------------------------------------------
@app.route("/api/train", methods=["POST"])
async def api_train() -> str:
"""Generate speech/intent artifacts for profile."""
no_cache = request.args.get("nocache", "false").lower() == "true"
assert core is not None
start_time = time.time()
logger.info("Starting training")
result = await core.train(no_cache=no_cache)
if isinstance(result, ProfileTrainingFailed):
raise Exception(f"Training failed: {result.reason}")
end_time = time.time()
return "Training completed in %0.2f second(s)" % (end_time - start_time)
# -----------------------------------------------------------------------------
@app.route("/api/restart", methods=["POST"])
async def api_restart() -> str:
"""Restart Rhasspy actors."""
assert core is not None
logger.debug("Restarting Rhasspy")
# Stop
await core.shutdown()
# Start
await start_rhasspy()
logger.info("Restarted Rhasspy")
return "Restarted Rhasspy"
# -----------------------------------------------------------------------------
@app.route("/api/speech-to-text", methods=["POST"])
async def api_speech_to_text() -> str:
"""Transcribe speech from WAV file."""
global last_voice_wav
no_header = request.args.get("noheader", "false").lower() == "true"
assert core is not None
# Prefer 16-bit 16Khz mono, but will convert with sox if needed
wav_data = await request.data
if no_header:
# Wrap in WAV
wav_data = buffer_to_wav(wav_data)
last_voice_wav = wav_data
start_time = time.perf_counter()
result = await core.transcribe_wav(wav_data)
end_time = time.perf_counter()
# Send to websocket
await add_ws_event(
"transcription",
json.dumps(
{"text": result.text, "wakewordId": "default", "siteId": core.siteId}
),
)
if prefers_json():
return jsonify(
{
"text": result.text,
"likelihood": result.confidence,
"transcribe_seconds": (end_time - start_time),
"wav_seconds": get_wav_duration(wav_data),
}
)
return result.text
# -----------------------------------------------------------------------------
@app.route("/api/text-to-intent", methods=["POST"])
async def api_text_to_intent():
"""Recgonize intent from text and optionally handle."""
assert core is not None
data = await request.data
text = data.decode()
no_hass = request.args.get("nohass", "false").lower() == "true"
# Convert text to intent
start_time = time.time()
intent = (await core.recognize_intent(text)).intent
intent["speech_confidence"] = 1
intent_sec = time.time() - start_time
intent["time_sec"] = intent_sec
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
intent = (await core.handle_intent(intent)).intent
return jsonify(intent)
# -----------------------------------------------------------------------------
@app.route("/api/speech-to-intent", methods=["POST"])
async def api_speech_to_intent() -> Response:
"""Transcribe speech, recognize intent, and optionally handle."""
global last_voice_wav
assert core is not None
no_hass = request.args.get("nohass", "false").lower() == "true"
# Prefer 16-bit 16Khz mono, but will convert with sox if needed
wav_data = await request.data
last_voice_wav = wav_data
# speech -> text
start_time = time.time()
transcription = await core.transcribe_wav(wav_data)
text = transcription.text
logger.debug(text)
# Send to websocket
await add_ws_event(
"transcription",
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
)
# text -> intent
intent = (await core.recognize_intent(text)).intent
intent["speech_confidence"] = transcription.confidence
intent_sec = time.time() - start_time
intent["time_sec"] = intent_sec
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
intent = (await core.handle_intent(intent)).intent
return jsonify(intent)
# -----------------------------------------------------------------------------
@app.route("/api/start-recording", methods=["POST"])
async def api_start_recording() -> str:
"""Begin recording voice command."""
assert core is not None
buffer_name = request.args.get("name", "")
core.start_recording_wav(buffer_name)
return "OK"
@app.route("/api/stop-recording", methods=["POST"])
async def api_stop_recording() -> Response:
"""End recording voice command. Transcribe and handle."""
global last_voice_wav
assert core is not None
no_hass = request.args.get("nohass", "false").lower() == "true"
buffer_name = request.args.get("name", "")
audio_data = (await core.stop_recording_wav(buffer_name)).data
wav_data = buffer_to_wav(audio_data)
logger.debug("Recorded %s byte(s) of audio data", len(wav_data))
transcription = await core.transcribe_wav(wav_data)
text = transcription.text
logger.debug(text)
# Send to websocket
await add_ws_event(
"transcription",
json.dumps({"text": text, "wakewordId": "default", "siteId": core.siteId}),
)
intent = (await core.recognize_intent(text)).intent
intent["speech_confidence"] = transcription.confidence
intent_json = json.dumps(intent)
logger.debug(intent_json)
await add_ws_event("intent", intent_json)
if not no_hass:
# Send intent to Home Assistant
intent = (await core.handle_intent(intent)).intent
# Save last voice command WAV data
last_voice_wav = wav_data
return jsonify(intent)
@app.route("/api/play-recording", methods=["POST"])
async def api_play_recording() -> str:
"""Play last recorded voice command through the configured audio output system"""
global last_voice_wav
assert core is not None
if last_voice_wav:
# Play through speakers
logger.debug("Playing %s byte(s)", len(last_voice_wav))
core.play_wav_data(last_voice_wav)
return "OK"
# -----------------------------------------------------------------------------
@app.route("/api/unknown-words", methods=["GET"])
async def api_unknown_words() -> Response:
"""Get list of unknown words."""
assert core is not None
speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")
unknown_words = {}
unknown_path = Path(
core.profile.read_path(
core.profile.get(
f"speech_to_text.{speech_system}.unknown_words", "unknown_words.txt"
)
)
)
if unknown_path.is_file():
for line in open(unknown_path, "r"):
line = line.strip()
if line:
word, pronunciation = re.split(r"[ ]+", line, maxsplit=1)
unknown_words[word] = pronunciation
return jsonify(unknown_words)
# -----------------------------------------------------------------------------
last_sentence = ""
@app.route("/api/text-to-speech", methods=["POST"])
async def api_text_to_speech() -> Union[bytes, str]:
"""Speak a sentence with text to speech system."""
global last_sentence
repeat = request.args.get("repeat", "false").strip().lower() == "true"
play = request.args.get("play", "true").strip().lower() == "true"
language = request.args.get("language")
voice = request.args.get("voice")
siteId = request.args.get("siteId")
data = await request.data
sentence = last_sentence if repeat else data.decode().strip()
assert core is not None
result = await core.speak_sentence(
sentence, play=play, language=language, voice=voice, siteId=siteId
)
last_sentence = sentence
if not play:
# Return WAV data instead of speaking
return Response(result.wav_data, mimetype="audio/wav")
return sentence
# -----------------------------------------------------------------------------
@app.route("/api/slots", methods=["GET", "POST"])
async def api_slots() -> Union[str, Response]:
"""Get the values of all slots."""
assert core is not None
if request.method == "POST":
overwrite_all = request.args.get("overwrite_all", "false").lower() == "true"
new_slot_values = json5.loads(await request.data)
slots_dir = Path(
core.profile.write_path(
core.profile.get("speech_to_text.slots_dir", "slots")
)
)
if overwrite_all:
# Remote existing values first
for name in new_slot_values:
slots_path = safe_join(slots_dir, f"{name}")
if slots_path.is_file():
try:
slots_path.unlink()
except Exception:
logger.exception("api_slots")
for name, values in new_slot_values.items():
if isinstance(values, str):
values = [values]
slots_path = slots_dir / name
# Create directories
slots_path.parent.mkdir(parents=True, exist_ok=True)
# Merge with existing values
values = {v.strip() for v in values}
if slots_path.is_file():
values.update(
line.strip() for line in slots_path.read_text().splitlines()
)
# Write merged values
if values:
with open(slots_path, "w") as slots_file:
for value in values:
if value:
print(value, file=slots_file)
return "OK"
# Read slots into dictionary
slots_dir = Path(
core.profile.read_path(core.profile.get("speech_to_text.slots_dir", "slots"))
)
slots_dict = {}
if slots_dir.is_dir():
for slot_file_path in slots_dir.glob("*"):
if slot_file_path.is_file():
slot_name = slot_file_path.name
slots_dict[slot_name] = [
line.strip() for line in slot_file_path.read_text().splitlines()
]
return jsonify(slots_dict)
@app.route("/api/slots/<name>", methods=["GET", "POST"])
def api_slots_by_name(name: str) -> Union[str, Response]:
"""Get or set the values of a slot list."""
assert core is not None
overwrite_all = request.args.get("overwrite_all", "false").lower() == "true"
slots_dir = Path(
core.profile.read_path(core.profile.get("speech_to_text.slots_dir", "slots"))
)
if request.method == "POST":
if overwrite_all:
# Remote existing values first
slots_path = safe_join(slots_dir, f"{name}")
if slots_path.is_file():
try:
slots_path.unlink()
except Exception:
logger.exception("api_slots_by_name")
slots_path = Path(
core.profile.write_path(
core.profile.get("speech_to_text.slots_dir", "slots"), f"{name}"
)
)
# Create directories
slots_path.parent.mkdir(parents=True, exist_ok=True)
# Write data
with open(slots_path, "wb") as slots_file:
slots_file.write(request.data)
return f"Wrote {len(request.data)} byte(s) to {slots_path}"
# Load slots values
slot_values: List[str] = []
slot_file_path = slots_dir / name
if slot_file_path.is_file():
slot_values = [line.strip() for line in slot_file_path.read_text().splitlines()]
return jsonify(slot_values)
# -----------------------------------------------------------------------------
@app.route("/api/intents")
def api_intents():
"""Return JSON with information about intents."""
assert core is not None
sentences_ini = Path(
core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
)
sentences_dir = Path(
core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
)
# Load all .ini files and parse
ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)
intents: Dict[str, Any] = get_all_intents(ini_paths)
def add_type(item, item_dict: Dict[str, Any]):
"""Add item_type to expression dictionary."""
item_dict["item_type"] = type(item).__name__
if hasattr(item, "items"):
# Group, alternative, etc.
for sub_item, sub_item_dict in zip(item.items, item_dict["items"]):
add_type(sub_item, sub_item_dict)
elif hasattr(item, "rule_body"):
# Rule
add_type(item.rule_body, item_dict["rule_body"])
# Convert to dictionary
intents_dict = {}
for intent_name, intent_sentences in intents.items():
sentence_dicts = []
for sentence in intent_sentences:
sentence_dict = attr.asdict(sentence)
# Add item_type field
add_type(sentence, sentence_dict)
sentence_dicts.append(sentence_dict)
intents_dict[intent_name] = sentence_dicts
# Convert to JSON
return jsonify(intents_dict)
# -----------------------------------------------------------------------------
@app.route("/process", methods=["GET"])
async def marytts_process() -> Response:
"""Emulate MaryTTS /process API"""
global last_sentence
assert core is not None
sentence = request.args.get("INPUT_TEXT", "")
voice = request.args.get("VOICE")
locale = request.args.get("LOCALE")
spoken = await core.speak_sentence(
sentence, play=False, voice=voice, language=locale
)
return Response(spoken.wav_data, mimetype="audio/wav")
# -----------------------------------------------------------------------------
@app.errorhandler(Exception)
async def handle_error(err) -> Tuple[str, int]:
"""Return error as text."""
logger.exception(err)
return (str(err), 500)
# ---------------------------------------------------------------------
# Static Routes
# ---------------------------------------------------------------------
web_dir = Path("dist")
assert web_dir.is_dir(), f"Missing web directory {web_dir}"
css_dir = web_dir / "css"
js_dir = web_dir / "js"
img_dir = web_dir / "img"
webfonts_dir = web_dir / "webfonts"
@app.route("/css/<path:filename>", methods=["GET"])
async def css(filename) -> Response:
"""CSS static endpoint."""
return await send_from_directory(css_dir, filename)
@app.route("/js/<path:filename>", methods=["GET"])
async def js(filename) -> Response:
"""Javascript static endpoint."""
return await send_from_directory(js_dir, filename)
@app.route("/img/<path:filename>", methods=["GET"])
async def img(filename) -> Response:
"""Image static endpoint."""
return await send_from_directory(img_dir, filename)
@app.route("/webfonts/<path:filename>", methods=["GET"])
async def webfonts(filename) -> Response:
"""Web font static endpoint."""
return await send_from_directory(webfonts_dir, filename)
# ----------------------------------------------------------------------------
# HTML Page Routes
# ----------------------------------------------------------------------------
@app.route("/", methods=["GET"])
async def index() -> Response:
"""Render main web page."""
return await send_file(web_dir / "index.html")
@app.route("/swagger.yaml", methods=["GET"])
async def swagger_yaml() -> Response:
"""OpenAPI static endpoint."""
return await send_file(web_dir / "swagger.yaml")
# -----------------------------------------------------------------------------
# WebSocket API
# -----------------------------------------------------------------------------
user_queues: Set[asyncio.Queue] = set()
logging_queues: Set[asyncio.Queue] = set()
async def add_ws_event(message_type: str, text: str):
"""Send text out to all user websockets for a specific event."""
for q in user_queues:
await q.put((message_type, text))
async def log_ws_event(text: str):
"""Send logging message out to websockets."""
for q in logging_queues:
await q.put(text)
# Send logging messages out to websocket
logging.root.addHandler(
FunctionLoggingHandler(
lambda msg: asyncio.run_coroutine_threadsafe(log_ws_event(msg), loop)
)
)
class WebSocketObserver(RhasspyActor):
"""Observe the dialogue manager and output intents to the websocket."""
def in_started(self, message: Any, sender: RhasspyActor) -> None:
"""Handle messages in started state."""
global last_voice_wav
if isinstance(message, IntentRecognized):
# Add slots
intent_slots = {}
for ev in message.intent.get("entities", []):
intent_slots[ev["entity"]] = ev["value"]
message.intent["slots"] = intent_slots
# Convert to JSON
intent_json = json.dumps(message.intent)
self._logger.debug(intent_json)
asyncio.run_coroutine_threadsafe(add_ws_event("intent", intent_json), loop)
elif isinstance(message, WakeWordDetected):
assert core is not None
wake_json = json.dumps({"wakewordId": message.name, "siteId": core.siteId})
asyncio.run_coroutine_threadsafe(add_ws_event("wake", wake_json), loop)
elif isinstance(message, WavTranscription):
assert core is not None
transcription_json = json.dumps(
{
"text": message.text,
"wakewordId": message.wakewordId,
"siteId": core.siteId,
}
)
asyncio.run_coroutine_threadsafe(
add_ws_event("transcription_json", transcription_json), loop
)
elif isinstance(message, VoiceCommand):
# Save last voice command
last_voice_wav = buffer_to_wav(message.data)
def api_websocket(func):
"""Wraps a websocket route to use a user websocket queue"""
@wraps(func)
async def wrapper(*_args, **kwargs):
global user_queues
queue = asyncio.Queue()
user_queues.add(queue)
try:
return await func(queue, *_args, **kwargs)
except Exception:
logger.exception("api_websocket")
finally:
user_queues.discard(queue)
return wrapper
@app.websocket("/api/events/intent")
@api_websocket
async def api_events_intent(queue) -> None:
"""Websocket endpoint to receive intents as JSON."""
# Add new queue for websocket
await websocket.accept()
while True:
message_type, text = await queue.get()
if message_type == "intent":
await websocket.send(text)
@app.websocket("/api/events/text")
@api_websocket
async def api_events_text(queue) -> None:
"""Websocket endpoint for transcriptions."""
await websocket.accept()
while True:
message_type, text = await queue.get()
if message_type == "transcription":
await websocket.send(text)
@app.websocket("/api/events/wake")
@api_websocket
async def api_events_wake(queue) -> None:
"""Websocket endpoint to report wake up."""
await websocket.accept()
while True:
message_type, text = await queue.get()
if message_type == "wake":
await websocket.send(text)
@app.websocket("/api/events/log")
async def api_events_log() -> None:
"""Websocket endpoint to receive logging messages as text."""
await websocket.accept()
# Add new queue for websocket
q: asyncio.Queue = asyncio.Queue()
logging_queues.add(q)
try:
while True:
text = await q.get()
await websocket.send(text)
except concurrent.futures.CancelledError:
pass
# Remove queue
logging_queues.discard(q)
# -----------------------------------------------------------------------------
# Swagger UI
quart_api_doc(
app, config_path=(web_dir / "swagger.yaml"), url_prefix="/api", title="Rhasspy API"
)
# -----------------------------------------------------------------------------
def prefers_json() -> bool:
"""True if client prefers JSON over plain text."""
return quality(request.accept_mimetypes, "application/json") > quality(
request.accept_mimetypes, "text/plain"
)
def quality(accept, key: str) -> float:
"""Return Accept quality for media type."""
for option in accept.options:
# pylint: disable=W0212
if accept._values_match(key, option.value):
return option.quality
return 0.0
# -----------------------------------------------------------------------------
# Start Rhasspy actors
loop.run_until_complete(start_rhasspy())
# -----------------------------------------------------------------------------
# Disable useless logging messages
logging.getLogger("wsproto").setLevel(logging.CRITICAL)
# Start web server
if args.ssl is not None:
logger.debug("Using SSL with certfile, keyfile = %s", args.ssl)
certfile = args.ssl[0]
keyfile = args.ssl[1]
protocol = "https"
else:
certfile = None
keyfile = None
protocol = "http"
logger.debug("Starting web server at %s://%s:%s", protocol, args.host, args.port)
try:
app.run(host=args.host, port=args.port, certfile=certfile, keyfile=keyfile)
except KeyboardInterrupt:
pass