#android #android-studio #speech-recognition #kaldi #vosk
Вопрос:
У меня есть специально обученная модель, использующая kaldi для распознавания речи из нескольких слов. Теперь, когда я интегрирую модель kaldi в vosk с помощью Android studio, она выдает мне эту ошибку. Пожалуйста, кто-нибудь, помогите мне.
VoskAPI: ExpectToken():io-funcs.cc:200) Не удалось прочитать маркер [запущен в позиции файла 0], ожидалось
Пожалуйста, нажмите на синий текст ниже, чтобы открыть изображения. ошибка android studio
полный код для Android studio java.
// Copyright 2019 Alpha Cephei Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.vosk.demo;
import android.Manifest;
import android.app.Activity;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.text.method.ScrollingMovementMethod;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;
import org.json.JSONException;
import org.json.JSONObject;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import org.vosk.android.RecognitionListener;
import org.vosk.android.SpeechService;
import org.vosk.android.SpeechStreamService;
import org.vosk.android.StorageService;
import java.io.IOException;
import java.io.InputStream;
import androidx.annotation.NonNull;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
public class VoskActivity extends Activity implements
RecognitionListener {
static private final int STATE_START = 0;
static private final int STATE_READY = 1;
static private final int STATE_DONE = 2;
static private final int STATE_FILE = 3;
static private final int STATE_MIC = 4;
/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private Model model;
private SpeechService speechService;
private SpeechStreamService speechStreamService;
private TextView resultView;
@Override
public void onCreate(Bundle state) {
super.onCreate(state);
setContentView(R.layout.main);
// Setup layout
resultView = findViewById(R.id.result_text);
setUiState(STATE_START);
findViewById(R.id.recognize_mic).setOnClickListener(view -> recognizeMicrophone());
((ToggleButton) findViewById(R.id.pause)).setOnCheckedChangeListener((view, isChecked) -> pause(isChecked));
LibVosk.setLogLevel(LogLevel.INFO);
// Check if user has given permission to record audio, init the model after permission is granted
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
} else {
initModel();
}
}
private void initModel() {
StorageService.unpack(this, "model-en-us", "model",
(model) -> {
this.model = model;
setUiState(STATE_READY);
},
(exception) -> setErrorState("Failed to unpack the model" exception.getMessage()));
}
@Override
public void onRequestPermissionsResult(int requestCode,
@NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 amp;amp; grantResults[0] == PackageManager.PERMISSION_GRANTED) {
// Recognizer initialization is a time-consuming and it involves IO,
// so we execute it in async task
initModel();
} else {
finish();
}
}
}
@Override
public void onDestroy() {
super.onDestroy();
if (speechService != null) {
speechService.stop();
speechService.shutdown();
}
if (speechStreamService != null) {
speechStreamService.stop();
}
}
@Override
public void onResult(String s) {
String spoken="";
try {
JSONObject o = new JSONObject(s);
spoken=o.getString("text");
if(spoken.equals("yes") || spoken.equals("no") || spoken.equals("help") || spoken.equals("stop") || spoken.equals("hey"))
{
Toast.makeText(getApplicationContext(),spoken , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("one"))
{
Toast.makeText(getApplicationContext(),"1" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("two") || spoken.equals("to"))
{
Toast.makeText(getApplicationContext(),"2" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("three"))
{
Toast.makeText(getApplicationContext(),"3" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("four")|| spoken.equals("for"))
{
Toast.makeText(getApplicationContext(),"4" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("five"))
{
Toast.makeText(getApplicationContext(),"5" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("six"))
{
Toast.makeText(getApplicationContext(),"6" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("seven"))
{
Toast.makeText(getApplicationContext(),"7" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("eight"))
{
Toast.makeText(getApplicationContext(),"8" , Toast.LENGTH_SHORT).show();
}
else if(spoken.equals("nine"))
{
Toast.makeText(getApplicationContext(),"9" , Toast.LENGTH_SHORT).show();
}
}
catch (JSONException ignored) {
Toast.makeText(getApplicationContext(), ignored.getMessage(), Toast.LENGTH_LONG).show();
}
}
@Override
public void onFinalResult(String hypothesis) {
// Toast.makeText(getApplicationContext(), hypothesis, Toast.LENGTH_LONG).show();
// setUiState(STATE_DONE);
if (speechStreamService != null) {
speechStreamService = null;
}
}
@Override
public void onPartialResult(String hypothesis)
{
}
@Override
public void onError(Exception e) {
setErrorState(e.getMessage());
}
@Override
public void onTimeout() {
setUiState(STATE_DONE);
}
private void setUiState(int state) {
switch (state) {
case STATE_START:
resultView.setText(R.string.preparing);
resultView.setMovementMethod(new ScrollingMovementMethod());
findViewById(R.id.recognize_mic).setEnabled(false);
findViewById(R.id.pause).setEnabled((false));
break;
case STATE_READY:
resultView.setText(R.string.ready);
((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
findViewById(R.id.recognize_mic).setEnabled(true);
findViewById(R.id.pause).setEnabled((false));
break;
case STATE_DONE:
((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
findViewById(R.id.recognize_mic).setEnabled(true);
findViewById(R.id.pause).setEnabled((false));
break;
case STATE_FILE:
resultView.setText(getString(R.string.starting));
findViewById(R.id.recognize_mic).setEnabled(false);
findViewById(R.id.pause).setEnabled((false));
break;
case STATE_MIC:
((Button) findViewById(R.id.recognize_mic)).setText(R.string.stop_microphone);
resultView.setText(getString(R.string.say_something));
findViewById(R.id.recognize_mic).setEnabled(true);
findViewById(R.id.pause).setEnabled((true));
break;
default:
throw new IllegalStateException("Unexpected value: " state);
}
}
private void setErrorState(String message) {
resultView.setText(message);
((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
findViewById(R.id.recognize_mic).setEnabled(false);
}
private void recognizeFile() {
if (speechStreamService != null) {
setUiState(STATE_DONE);
speechStreamService.stop();
speechStreamService = null;
} else {
setUiState(STATE_FILE);
try {
Recognizer rec = new Recognizer(model, 16000.f, "["one zero zero zero one", "
""oh zero one two three four five six seven eight nine", "[unk]"]");
InputStream ais = getAssets().open(
"10001-90210-01803.wav");
if (ais.skip(44) != 44) throw new IOException("File too short");
speechStreamService = new SpeechStreamService(rec, ais, 16000);
speechStreamService.start(this);
} catch (IOException e) {
setErrorState(e.getMessage());
}
}
}
private void recognizeMicrophone() {
if (speechService != null) {
setUiState(STATE_DONE);
speechService.stop();
speechService = null;
} else {
setUiState(STATE_MIC);
try {
Recognizer rec = new Recognizer(model, 16000.0f);
speechService = new SpeechService(rec, 16000.0f);
speechService.startListening(this);
} catch (IOException e) {
setErrorState(e.getMessage());
}
}
}
private void pause(boolean checked) {
if (speechService != null) {
speechService.setPause(checked);
}
}
}
Комментарии:
1. Пожалуйста, предоставьте достаточно кода, чтобы другие могли лучше понять или воспроизвести проблему.