android - Android 主线程 UI 在实现 Google Speech-to-text 时没有响应。怎么解决?
问题描述
目前,我正在我的项目中实现 google Speech to Text。引用的示例代码是这样的:单击此处。
我已经使用了这个项目中的 SpeechService 和 Voice Recorder 类。
public class SpeechService extends Service {
public static final List<String> SCOPE =
Collections.singletonList("https://www.googleapis.com/auth/cloud-platform");
private static final String TAG = "SpeechService";
private static final String PREFS = "SpeechService";
private static final String PREF_ACCESS_TOKEN_VALUE = "access_token_value";
private static final String PREF_ACCESS_TOKEN_EXPIRATION_TIME = "access_token_expiration_time";
/**
* We reuse an access token if its expiration time is longer than this.
*/
private static final int ACCESS_TOKEN_EXPIRATION_TOLERANCE = 30 * 60 * 1000; // thirty minutes
/**
* We refresh the current access token before it expires.
*/
private static final int ACCESS_TOKEN_FETCH_MARGIN = 60 * 1000; // one minute
private static final String HOSTNAME = "speech.googleapis.com";
private static final int PORT = 443;
private static Handler mHandler;
private final SpeechBinder mBinder = new SpeechBinder();
private final ArrayList<Listener> mListeners = new ArrayList<>();
private final StreamObserver<StreamingRecognizeResponse> mResponseObserver
= new StreamObserver<StreamingRecognizeResponse>() {
@Override
public void onNext(StreamingRecognizeResponse response) {
Log.e("Speech", "Recognized");
String text = null;
boolean isFinal = false;
if (response.getResultsCount() > 0) {
System.out.println("result count....."+String.valueOf(response.getResultsCount()));
final StreamingRecognitionResult result = response.getResults(0);
isFinal = result.getIsFinal();
if (result.getAlternativesCount() > 0) {
final SpeechRecognitionAlternative alternative = result.getAlternatives(0);
text = alternative.getTranscript();
}
}
if (text != null && isFinal) {
for (Listener listener : mListeners) {
listener.onSpeechRecognized(text, isFinal);
}
} else {
for (Listener listener : mListeners) {
listener.onRandomStupidity();
}
}
}
@Override
public void onError(Throwable t) {
Log.e(TAG, "Error calling the API.", t);
for(Listener listener : mListeners){
listener.onErrorRecognizing();
}
}
@Override
public void onCompleted() {
Log.i(TAG, "API completed.");
}
};
private volatile AccessTokenTask mAccessTokenTask;
private final Runnable mFetchAccessTokenRunnable = new Runnable() {
@Override
public void run() {
fetchAccessToken();
}
};
private SpeechGrpc.SpeechStub mApi;
private StreamObserver<StreamingRecognizeRequest> mRequestObserver;
public static SpeechService from(IBinder binder) {
return ((SpeechBinder) binder).getService();
}
@Override
public void onCreate() {
super.onCreate();
mHandler = new Handler();
fetchAccessToken();
}
@Override
public void onDestroy() {
super.onDestroy();
mHandler.removeCallbacks(mFetchAccessTokenRunnable);
mHandler = null;
// Release the gRPC channel.
if (mApi != null) {
final ManagedChannel channel = (ManagedChannel) mApi.getChannel();
if (channel != null && !channel.isShutdown()) {
try {
channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Log.e(TAG, "Error shutting down the gRPC channel.", e);
}
}
mApi = null;
}
}
private void fetchAccessToken() {
if (mAccessTokenTask != null) {
return;
}
mAccessTokenTask = new AccessTokenTask();
mAccessTokenTask.execute();
}
private String getDefaultLanguageCode() {
final LangInnerResponse languageToLearn = MemoryCache.getLanguageToLearn();
if(languageToLearn != null) {
Log.e("Test Lang", languageToLearn.getCode());
return languageToLearn.getCode();
} else {
final Locale locale = Locale.getDefault();
final StringBuilder language = new StringBuilder(locale.getLanguage());
final String country = locale.getCountry();
if (!TextUtils.isEmpty(country)) {
language.append("-");
language.append(country);
}
return language.toString();
}
}
@Nullable
@Override
public IBinder onBind(Intent intent) {
return mBinder;
}
public void addListener(@NonNull Listener listener) {
mListeners.add(listener);
}
public void removeListener(@NonNull Listener listener) {
mListeners.remove(listener);
}
/**
** Starts recognizing speech audio.
*
* @param sampleRate The sample rate of the audio.
*/
public void startRecognizing(int sampleRate) {
if (mApi == null) {
Log.w(TAG, "API not ready. Ignoring the request.");
return;
}
System.out.println("calling api....");
// Configure the API
mRequestObserver = mApi.streamingRecognize(mResponseObserver);
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(StreamingRecognitionConfig.newBuilder()
.setConfig(RecognitionConfig.newBuilder()
.setLanguageCode(getDefaultLanguageCode())
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setSampleRateHertz(sampleRate)
.build())
.setInterimResults(true)
.setSingleUtterance(true)
.build())
.build());
}
/**
* Recognizes the speech audio. This method should be called every time a chunk of byte buffer
* is ready.
*
* @param data The audio data.
* @param size The number of elements that are actually relevant in the {@code data}.
*/
public void recognize(byte[] data, int size) {
if (mRequestObserver == null) {
return;
}
// Call the streaming recognition API
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data, 0, size))
.build());
}
/**
* Finishes recognizing speech audio.
*/
public void finishRecognizing() {
if (mRequestObserver == null) {
return;
}
mRequestObserver.onCompleted();
mRequestObserver = null;
}
public interface Listener {
/**
* Called when a new piece of text was recognized by the Speech API.
*
* @param text The text.
* @param isFinal {@code true} when the API finished processing audio.
*/
void onSpeechRecognized(String text, boolean isFinal);
void onErrorRecognizing();
void onRandomStupidity();
}
/**
* Authenticates the gRPC channel using the specified {@link GoogleCredentials}.
*/
private static class GoogleCredentialsInterceptor implements ClientInterceptor {
private final Credentials mCredentials;
private Metadata mCached;
private Map<String, List<String>> mLastMetadata;
GoogleCredentialsInterceptor(Credentials credentials) {
mCredentials = credentials;
}
private static Metadata toHeaders(Map<String, List<String>> metadata) {
Metadata headers = new Metadata();
if (metadata != null) {
for (String key : metadata.keySet()) {
Metadata.Key<String> headerKey = Metadata.Key.of(
key, Metadata.ASCII_STRING_MARSHALLER);
for (String value : metadata.get(key)) {
headers.put(headerKey, value);
}
}
}
return headers;
}
@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
final MethodDescriptor<ReqT, RespT> method, CallOptions callOptions,
final Channel next) {
return new ClientInterceptors.CheckedForwardingClientCall<ReqT, RespT>(
next.newCall(method, callOptions)) {
@Override
protected void checkedStart(Listener<RespT> responseListener, Metadata headers)
throws StatusException {
Metadata cachedSaved;
URI uri = serviceUri(next, method);
synchronized (this) {
Map<String, List<String>> latestMetadata = getRequestMetadata(uri);
if (mLastMetadata == null || mLastMetadata != latestMetadata) {
mLastMetadata = latestMetadata;
mCached = toHeaders(mLastMetadata);
}
cachedSaved = mCached;
}
headers.merge(cachedSaved);
delegate().start(responseListener, headers);
}
};
}
/**
* Generate a JWT-specific service URI. The URI is simply an identifier with enough
* information for a service to know that the JWT was intended for it. The URI will
* commonly be verified with a simple string equality check.
*/
private URI serviceUri(Channel channel, MethodDescriptor<?, ?> method)
throws StatusException {
String authority = channel.authority();
if (authority == null) {
throw Status.UNAUTHENTICATED
.withDescription("Channel has no authority")
.asException();
}
// Always use HTTPS, by definition.
final String scheme = "https";
final int defaultPort = 443;
String path = "/" + MethodDescriptor.extractFullServiceName(method.getFullMethodName());
URI uri;
try {
uri = new URI(scheme, authority, path, null, null);
} catch (URISyntaxException e) {
throw Status.UNAUTHENTICATED
.withDescription("Unable to construct service URI for auth")
.withCause(e).asException();
}
// The default port must not be present. Alternative ports should be present.
if (uri.getPort() == defaultPort) {
uri = removePort(uri);
}
return uri;
}
private URI removePort(URI uri) throws StatusException {
try {
return new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), -1 /* port */,
uri.getPath(), uri.getQuery(), uri.getFragment());
} catch (URISyntaxException e) {
throw Status.UNAUTHENTICATED
.withDescription("Unable to construct service URI after removing port")
.withCause(e).asException();
}
}
private Map<String, List<String>> getRequestMetadata(URI uri) throws StatusException {
try {
return mCredentials.getRequestMetadata(uri);
} catch (IOException e) {
throw Status.UNAUTHENTICATED.withCause(e).asException();
}
}
}
private class SpeechBinder extends Binder {
SpeechService getService() {
return SpeechService.this;
}
}
private class CreateApiSingle implements SingleOnSubscribe<SpeechGrpc.SpeechStub> {
@Override
public void subscribe(SingleEmitter<SpeechGrpc.SpeechStub> emitter) throws Exception {
final AccessToken accessToken = generateCredentials();
final SpeechGrpc.SpeechStub api = generateApi(accessToken);
emitter.onSuccess(api);
}
private AccessToken generateCredentials() throws IOException {
final SharedPreferences prefs =
getSharedPreferences(PREFS, Context.MODE_PRIVATE);
String tokenValue = prefs.getString(PREF_ACCESS_TOKEN_VALUE, null);
long expirationTime = prefs.getLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME, -1);
// Check if the current token is still valid for a while
if (tokenValue != null && expirationTime > 0) {
if (expirationTime
> System.currentTimeMillis() + ACCESS_TOKEN_EXPIRATION_TOLERANCE) {
return new AccessToken(tokenValue, new Date(expirationTime));
}
}
// ***** WARNING *****
// In this sample, we load the credential from a JSON file stored in a raw resource
// folder of this client app. You should never do this in your app. Instead, store
// the file in your server and obtain an access token from there.
// *******************
final InputStream stream = getResources().openRawResource(R.raw.credential);
final GoogleCredentials credentials = GoogleCredentials.fromStream(stream)
.createScoped(SCOPE);
final AccessToken token = credentials.refreshAccessToken();
prefs.edit()
.putString(PREF_ACCESS_TOKEN_VALUE, token.getTokenValue())
.putLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME,
token.getExpirationTime().getTime())
.apply();
stream.close();
return token;
}
private SpeechGrpc.SpeechStub generateApi(AccessToken accessToken) {
final ManagedChannel channel = new OkHttpChannelProvider()
.builderForAddress(HOSTNAME, PORT)
.nameResolverFactory(new DnsNameResolverProvider())
.intercept(new GoogleCredentialsInterceptor(new GoogleCredentials(accessToken)
.createScoped(SCOPE)))
.build();
return SpeechGrpc.newStub(channel);
}
}
private class AccessTokenTask extends AsyncTask<Void, Void, AccessToken> {
@Override
protected AccessToken doInBackground(Void... voids) {
final SharedPreferences prefs =
getSharedPreferences(PREFS, Context.MODE_PRIVATE);
String tokenValue = prefs.getString(PREF_ACCESS_TOKEN_VALUE, null);
long expirationTime = prefs.getLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME, -1);
// Check if the current token is still valid for a while
if (tokenValue != null && expirationTime > 0) {
if (expirationTime
> System.currentTimeMillis() + ACCESS_TOKEN_EXPIRATION_TOLERANCE) {
return new AccessToken(tokenValue, new Date(expirationTime));
}
}
// ***** WARNING *****
// In this sample, we load the credential from a JSON file stored in a raw resource
// folder of this client app. You should never do this in your app. Instead, store
// the file in your server and obtain an access token from there.
// *******************
final InputStream stream = getResources().openRawResource(R.raw.credential);
try {
final GoogleCredentials credentials = GoogleCredentials.fromStream(stream)
.createScoped(SCOPE);
final AccessToken token = credentials.refreshAccessToken();
prefs.edit()
.putString(PREF_ACCESS_TOKEN_VALUE, token.getTokenValue())
.putLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME,
token.getExpirationTime().getTime())
.apply();
return token;
} catch (IOException e) {
Log.e(TAG, "Failed to obtain access token.", e);
}
return null;
}
@Override
protected void onPostExecute(AccessToken accessToken) {
mAccessTokenTask = null;
final ManagedChannel channel = new OkHttpChannelProvider()
.builderForAddress(HOSTNAME, PORT)
.nameResolverFactory(new DnsNameResolverProvider())
.intercept(new GoogleCredentialsInterceptor(new GoogleCredentials(accessToken)
.createScoped(SCOPE)))
.build();
mApi = SpeechGrpc.newStub(channel);
// Schedule access token refresh before it expires
if (mHandler != null) {
mHandler.postDelayed(mFetchAccessTokenRunnable,
Math.max(accessToken.getExpirationTime().getTime()
- System.currentTimeMillis()
- ACCESS_TOKEN_FETCH_MARGIN, ACCESS_TOKEN_EXPIRATION_TOLERANCE));
}
}
}}
public class VoiceRecorder {
private static final int[] SAMPLE_RATE_CANDIDATES = new int[]{48000, 44100};
private static final int CHANNEL = AudioFormat.CHANNEL_IN_MONO;
private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT;
private static final int AMPLITUDE_THRESHOLD = 1500;
private static final int SPEECH_TIMEOUT_MILLIS = 2000;
private static final int MAX_SPEECH_LENGTH_MILLIS = 30 * 1000;
public static abstract class Callback {
/**
* Called when the recorder starts hearing voice.
*/
public void onVoiceStart() {
}
/**
* Called when the recorder is hearing voice.
*
* @param data The audio data in {@link AudioFormat#ENCODING_PCM_16BIT}.
* @param size The size of the actual data in {@code data}.
*/
public void onVoice(byte[] data, int size) {
}
/**
* Called when the recorder stops hearing voice.
*/
public void onVoiceEnd() {
}
}
private final Callback mCallback;
private AudioRecord mAudioRecord;
private Thread mThread;
private byte[] mBuffer;
private final Object mLock = new Object();
/** The timestamp of the last time that voice is heard. */
private long mLastVoiceHeardMillis = Long.MAX_VALUE;
/** The timestamp when the current voice is started. */
private long mVoiceStartedMillis;
public VoiceRecorder(@NonNull Callback callback) {
mCallback = callback;
}
/**
* Starts recording audio.
*
* <p>The caller is responsible for calling {@link #stop()} later.</p>
*/
public void start() {
// Stop recording if it is currently ongoing.
stop();
// Try to create a new recording session.
mAudioRecord = createAudioRecord();
if (mAudioRecord == null) {
throw new RuntimeException("Cannot instantiate VoiceRecorder");
}
// Start recording.
mAudioRecord.startRecording();
// Start processing the captured audio.
mThread = new Thread(new ProcessVoice());
mThread.start();
}
/**
* Stops recording audio.
*/
public void stop() {
synchronized (mLock) {
System.out.println("stop audio record....");
dismiss();
if (mThread != null) {
mThread.interrupt();
mThread = null;
}
if (mAudioRecord != null) {
mAudioRecord.stop();
mAudioRecord.release();
mAudioRecord = null;
}
mBuffer = null;
System.out.println("stop audio record....2");
}
}
/**
* Dismisses the currently ongoing utterance.
*/
public void dismiss() {
if (mLastVoiceHeardMillis != Long.MAX_VALUE) {
mLastVoiceHeardMillis = Long.MAX_VALUE;
mCallback.onVoiceEnd();
}
}
/**
* Retrieves the sample rate currently used to record audio.
*
* @return The sample rate of recorded audio.
*/
public int getSampleRate() {
if (mAudioRecord != null) {
return mAudioRecord.getSampleRate();
}
return 0;
}
/**
* Creates a new {@link AudioRecord}.
*
* @return A newly created {@link AudioRecord}, or null if it cannot be created (missing
* permissions?).
*/
private AudioRecord createAudioRecord() {
for (int sampleRate : SAMPLE_RATE_CANDIDATES) {
final int sizeInBytes = AudioRecord.getMinBufferSize(sampleRate, CHANNEL, ENCODING);
if (sizeInBytes == AudioRecord.ERROR_BAD_VALUE) {
continue;
}
final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,
sampleRate, CHANNEL, ENCODING, sizeInBytes);
if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {
mBuffer = new byte[sizeInBytes];
return audioRecord;
} else {
audioRecord.release();
}
}
return null;
}
/**
* Continuously processes the captured audio and notifies {@link #mCallback} of corresponding
* events.
*/
private class ProcessVoice implements Runnable {
@Override
public void run() {
while (true) {
synchronized (mLock) {
if (Thread.currentThread().isInterrupted()) {
break;
}
final int size = mAudioRecord.read(mBuffer, 0, mBuffer.length);
final long now = System.currentTimeMillis();
if (isHearingVoice(mBuffer, size)) {
if (mLastVoiceHeardMillis == Long.MAX_VALUE) {
mVoiceStartedMillis = now;
mCallback.onVoiceStart();
}
mCallback.onVoice(mBuffer, size);
mLastVoiceHeardMillis = now;
if (now - mVoiceStartedMillis > MAX_SPEECH_LENGTH_MILLIS) {
end();
}
} else if (mLastVoiceHeardMillis != Long.MAX_VALUE) {
mCallback.onVoice(mBuffer, size);
if (now - mLastVoiceHeardMillis > SPEECH_TIMEOUT_MILLIS) {
end();
}
}
}
}
}
private void end() {
mLastVoiceHeardMillis = Long.MAX_VALUE;
mCallback.onVoiceEnd();
System.out.println("end...");
}
private boolean isHearingVoice(byte[] buffer, int size) {
for (int i = 0; i < size - 1; i += 2) {
// The buffer has LINEAR16 in little endian.
int s = buffer[i + 1];
if (s < 0) s *= -1;
s <<= 8;
s += Math.abs(buffer[i]);
if (s > AMPLITUDE_THRESHOLD) {
return true;
}
}
return false;
}
}}
然后我实现了语音服务和录音机回调,如下所示:
private VoiceRecorder voiceRecorder;
private final SpeechService.Listener speechServiceListener = new SpeechService.Listener() {
@Override
public void onSpeechRecognized(final String text, final boolean isFinal) {
if (isFinal) {
System.out.println("ui thread...");
if (!TextUtils.isEmpty(text)) {
runOnUiThread(() -> {
showMessage(text);
flingAnswer(text);
});
}
}
}
@Override
public void onErrorRecognizing() {
showMessage("Please try again. Could not detect.");
}
@Override
public void onRandomStupidity() {
}
};
private SpeechService speechService;
private final VoiceRecorder.Callback voiceCallback = new VoiceRecorder.Callback() {
@Override
public void onVoiceStart() {
if (speechService != null) {
System.out.println("voice start....");
speechService.startRecognizing(voiceRecorder.getSampleRate());
}
}
@Override
public void onVoice(byte[] data, int size) {
if (speechService != null) {
speechService.recognize(data, size);
}
}
@Override
public void onVoiceEnd() {
if (speechService != null) {
speechService.finishRecognizing();
}
}
};
private final ServiceConnection serviceConnection = new ServiceConnection() {
@Override
public void onServiceConnected(ComponentName componentName, IBinder binder) {
speechService = SpeechService.from(binder);
speechService.addListener(speechServiceListener);
}
@Override
public void onServiceDisconnected(ComponentName componentName) {
speechService = null;
}
};
对于语音输入,代码如下:
@Override
public void stopRecognizing() {
stopVoiceRecorder();
Log.e("Recording", "Stopped");
}
@Override
public void startRecognizing() {
if (permissionManager != null && permissionManager.askForPermissions()) {
startVoiceRecorder();
vibrate.vibrate(50);//Providing haptic feedback to user on press.
}
Log.e("Recording", "Started");
}
binding.imgVoice.setOnTouchListener((v, event) -> {
switch (event.getAction()) {
case MotionEvent.ACTION_UP:
System.out.println("up...");
mCallback.stopRecognizing();
binding.imgVoice
.animate()
.scaleX(1.0f)
.scaleY(1.0f);
binding.imgVoice.setVisibility(View.GONE);
binding.progressBar.setVisibility(View.VISIBLE);
break;
case MotionEvent.ACTION_DOWN:
System.out.println("down...");
binding.imgVoice
.animate()
.scaleX(1.8f)
.scaleY(1.8f);
mCallback.startRecognizing();
break;
}
return true;
});
}
当我按下麦克风时,事件注册为 Action_Down,我启动录音机并在释放麦克风时停止录音机。此外,使用 Action_Down 我正在放大需要在 Action_Up 上缩小的麦克风图标。但是ui在大多数情况下都会冻结。我发现 StreamObserver 的onNext() 回调在isFinal变为真之前不断被调用。
private void startVoiceRecorder() {
if (voiceRecorder != null) {
voiceRecorder.stop();
}
voiceRecorder = new VoiceRecorder(voiceCallback);
voiceRecorder.start();
}
private void stopVoiceRecorder() {
if (voiceRecorder != null) {
voiceRecorder.stop();
voiceRecorder = null;
}
}
但是我希望麦克风在我释放没有发生的麦克风(在 Action up 事件中)后立即缩小。那么,如果有人可以帮助我解决这个问题?
提前致谢。
解决方案
推荐阅读
- ghost-blog - 幽灵到媒体的整合
- java - 如何在 Javafx 中删除视频开头的主窗口闪烁?
- python - 在键级python下面的字典中添加新元素
- xslt - xslt 多个插入节点(如果不存在)
- c# - Swashbuckle 在 asp.net 核心中失败,出现 NotSupportedException 异常
- jquery - 通过 Media Query 使用 css3 控制我的骨架 HTML5
- node.js - 使用redis发送好友状态
- ionic-framework - 405(不允许的方法),请求的资源上不存在“Access-Control-Allow-Origin”标头
- c# - 如何在 C# asp.net 中从 HTML 字符串生成控件
- java - 我想我需要创建自定义布局,但我不知道该怎么做