首页 > 解决方案 > 如何使 SpeechRecognition 和 SpeechSynthesis API 一起工作?

问题描述

我目前正在使用 Web Speech API 进行实验。这个想法是使用语音识别和合成来改善用户对 HTML 表单的体验。请记住,下面的代码只是一个概念,有很多问题。speechRecognition但是, API 和API似乎speechSynthesis不能很好地协同工作。我想要的结果是,当输入或按钮获得焦点时,将使用读取标签speechSynthesis,然后使用speechRecognition. 这似乎按预期工作。

问题是,在speechRecognition第一次开始之后,speechSynthesisget 的音量要低得多,而且有些东西会失败(不读)。我添加了一些逻辑来中止对焦点丢失的识别,但这似乎不起作用。有没有人遇到过这个问题?难道我做错了什么?欢迎任何帮助。

编辑:StackOverflow 不要求用户的麦克风权限,请使用此小提琴:https ://jsfiddle.net/wzt0nfp3/进行工作演示。

const speechRecognition = !!window.SpeechRecognition || !!window.webkitSpeechRecognition ?
  new(window.SpeechRecognition || window.webkitSpeechRecognition)() :
  false;

const speechSynthesis =
  "speechSynthesis" in window ? window.speechSynthesis : false;

if (!!speechRecognition && !!speechSynthesis) {
  let voice = null;
  speechRecognition.lang = "en-US";
  speechRecognition.continuous = false;
  speechRecognition.interimResults = false;

  const state = {
    speaking: false,
    listening: false
  };

  function loadVoice() {
    const voices = speechSynthesis.getVoices();

    let defaultVoice = null;
    let preferredVoice = null;

    voices.forEach(voice => {
      if (defaultVoice && preferredVoice) return;
      if (voice.default) defaultVoice = voice;
      if (voice.name.startsWith("Microsoft Jessa Online")) {
        preferredVoice = voice;
      }
    });

    voice = preferredVoice ? preferredVoice : defaultVoice;
  }

  loadVoice();
  speechSynthesis.onvoiceschanged = loadVoice;

  const abortRecognition = () => speechRecognition.abort();
  const startRecognition = () => speechRecognition.start();

  function speak(text) {
    if (speechSynthesis.speaking) speechSynthesis.cancel();

    if (text !== "") {
      const utterThis = new SpeechSynthesisUtterance(text);
      utterThis.lang = "en-US";
      utterThis.voice = voice;
      utterThis.volume = 1;
      utterThis.pitch = 1;
      utterThis.rate = 1;

      utterThis.addEventListener("start", event => {
        state.speaking = true;
        console.log("Start Speaking.", state);
      });

      utterThis.addEventListener("error", event => {
        state.speaking = false;
        console.log("Error: " + event.error, state);
      });

      utterThis.addEventListener("end", event => {
        startRecognition();
        state.speaking = false;
        console.log("Stop Speaking.", state);
      });

      speechSynthesis.speak(utterThis);
    }
  }

  speechRecognition.addEventListener("start", event => {
    state.listening = true;
    console.log("Start Listening.", state);
  });

  speechRecognition.addEventListener("error", event => {
    state.listening = false;
    console.log("Error: " + event.error, state);
  });

  speechRecognition.addEventListener("end", event => {
    state.listening = false;
    console.log("Stop Listening.", state);
  });

  speechRecognition.addEventListener("result", event => {
    if (typeof event.results === "undefined") return;
    state.listening = false;

    const capitalize = s => s.charAt(0).toUpperCase() + s.slice(1);
    const transcript = capitalize(event.results[0][0].transcript.trim());

    console.log("Transcript: " + transcript, state);

    if (transcript !== "") {
      const inputField = document.activeElement;
      inputField.value = transcript;
    }
  });

  document.querySelectorAll("input").forEach(input => {
    input.addEventListener("blur", () => abortRecognition());
    input.addEventListener("focus", e => {
      speak(e.target.parentElement.textContent.trim());
    });
  });

  document.querySelectorAll("textarea").forEach(textarea => {
    textarea.addEventListener("blur", () => abortRecognition());
    textarea.addEventListener("focus", e => {
      speak(e.target.parentElement.textContent.trim());
    });
  });

  document.querySelectorAll("button").forEach(button => {
    button.addEventListener("blur", () => abortRecognition());
    button.addEventListener("focus", e => {
      speak(e.target.textContent.trim());
    });
  });
}
body {
  font-family: sans-serif;
  margin: 5% 0;
}

form {
  display: flex;
  flex-direction: column;
  margin: 0 auto;
  max-width: 600px;
  width: 90%;
}

label {
  align-items: center;
  color: blue;
  display: flex;
  justify-content: space-between;
  margin-bottom: 1em;
}

input,
textarea {
  border: none;
  border-bottom: 1px solid blue;
  flex-grow: 1;
  font-size: inherit;
  margin-left: 1rem;
  max-width: 350px;
  padding: 0.5rem 0;
}

input:focus,
textarea:focus {
  outline: none;
  box-shadow: 0 1px 0 0 blue;
}

textarea {
  resize: vertical;
}

button {
  background-color: blue;
  border-radius: 0;
  border-radius: 3px;
  border: none;
  color: white;
  font-size: inherit;
  margin-top: 2rem;
  padding: 0.75rem 5rem;
  width: fit-content;
}

button:focus,
button:hover {
  background-color: transparent;
  border: 2px solid blue;
  color: blue;
  outline: none;
}
<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <meta http-equiv="X-UA-Compatible" content="ie=edge" />
  <title>Web Speech API</title>
</head>

<body>
  <form>
    <label>My name is
        <input type="text" placeholder="Your name" />
      </label>
    <label>My business is
        <input type="text" placeholder="Company name" />
      </label>
    <label>You can email me at
        <input type="email" placeholder="Email address" />
      </label>
    <label>You can call me at
        <input type="tel" placeholder="Phone number" />
      </label>
    <label>My project is
        <textarea rows="5" placeholder="Description of my project, budget, time constraints..."></textarea>
      </label>
    <button type="submit">Get in touch</button>
  </form>
  <script src="./index.js"></script>
</body>

</html>

标签: javascriptspeech-recognitiontext-to-speechspeech-to-text

解决方案


推荐阅读