API: ExpectToken():io-функции.cc:200) Не удалось прочитать токен [запущен в позиции файла 0], ожидается

#android #android-studio #speech-recognition #kaldi #vosk

Вопрос:

У меня есть специально обученная модель, использующая kaldi для распознавания речи из нескольких слов. Теперь, когда я интегрирую модель kaldi в vosk с помощью Android studio, она выдает мне эту ошибку. Пожалуйста, кто-нибудь, помогите мне.

VoskAPI: ExpectToken():io-funcs.cc:200) Не удалось прочитать маркер [запущен в позиции файла 0], ожидалось

Пожалуйста, нажмите на синий текст ниже, чтобы открыть изображения. ошибка android studio

Структура моей модели

полный код для Android studio java.

 // Copyright 2019 Alpha Cephei Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//       http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.vosk.demo;

import android.Manifest;
import android.app.Activity;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.text.method.ScrollingMovementMethod;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;

import org.json.JSONException;
import org.json.JSONObject;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import org.vosk.android.RecognitionListener;
import org.vosk.android.SpeechService;
import org.vosk.android.SpeechStreamService;
import org.vosk.android.StorageService;

import java.io.IOException;
import java.io.InputStream;

import androidx.annotation.NonNull;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

public class VoskActivity extends Activity implements
        RecognitionListener {

    static private final int STATE_START = 0;
    static private final int STATE_READY = 1;
    static private final int STATE_DONE = 2;
    static private final int STATE_FILE = 3;
    static private final int STATE_MIC = 4;

    /* Used to handle permission request */
    private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;

    private Model model;
    private SpeechService speechService;
    private SpeechStreamService speechStreamService;
    private TextView resultView;

    @Override
    public void onCreate(Bundle state) {
        super.onCreate(state);
        setContentView(R.layout.main);

        // Setup layout
        resultView = findViewById(R.id.result_text);
        setUiState(STATE_START);

        findViewById(R.id.recognize_mic).setOnClickListener(view -> recognizeMicrophone());
        ((ToggleButton) findViewById(R.id.pause)).setOnCheckedChangeListener((view, isChecked) -> pause(isChecked));

        LibVosk.setLogLevel(LogLevel.INFO);

        // Check if user has given permission to record audio, init the model after permission is granted
        int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
        if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
            ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
        } else {
            initModel();
        }
    }

    private void initModel() {
        StorageService.unpack(this, "model-en-us", "model",
                (model) -> {
                    this.model = model;
                    setUiState(STATE_READY);
                },
                (exception) -> setErrorState("Failed to unpack the model"   exception.getMessage()));
    }


    @Override
    public void onRequestPermissionsResult(int requestCode,
                                           @NonNull String[] permissions, @NonNull int[] grantResults) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults);

        if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
            if (grantResults.length > 0 amp;amp; grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                // Recognizer initialization is a time-consuming and it involves IO,
                // so we execute it in async task
                initModel();
            } else {
                finish();
            }
        }
    }

    @Override
    public void onDestroy() {
        super.onDestroy();

        if (speechService != null) {
            speechService.stop();
            speechService.shutdown();
        }

        if (speechStreamService != null) {
            speechStreamService.stop();
        }
    }

    @Override
    public void onResult(String s) {

        String spoken="";
        try {
            JSONObject o = new JSONObject(s);
            spoken=o.getString("text");
            if(spoken.equals("yes") || spoken.equals("no") || spoken.equals("help") || spoken.equals("stop") || spoken.equals("hey"))
            {
                Toast.makeText(getApplicationContext(),spoken , Toast.LENGTH_SHORT).show();
            }

            else if(spoken.equals("one"))
            {
                Toast.makeText(getApplicationContext(),"1" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("two") || spoken.equals("to"))
            {

                Toast.makeText(getApplicationContext(),"2" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("three"))
            {

                Toast.makeText(getApplicationContext(),"3" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("four")|| spoken.equals("for"))
            {

                Toast.makeText(getApplicationContext(),"4" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("five"))
            {

                Toast.makeText(getApplicationContext(),"5" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("six"))
            {

                Toast.makeText(getApplicationContext(),"6" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("seven"))
            {

                Toast.makeText(getApplicationContext(),"7" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("eight"))
            {

                Toast.makeText(getApplicationContext(),"8" , Toast.LENGTH_SHORT).show();
            }
            else if(spoken.equals("nine"))
            {

                Toast.makeText(getApplicationContext(),"9" , Toast.LENGTH_SHORT).show();
            }

        }
        catch (JSONException ignored) {
            Toast.makeText(getApplicationContext(), ignored.getMessage(), Toast.LENGTH_LONG).show();
        }


    }

    @Override
    public void onFinalResult(String hypothesis) {
       // Toast.makeText(getApplicationContext(), hypothesis, Toast.LENGTH_LONG).show();
     //   setUiState(STATE_DONE);
        if (speechStreamService != null) {
            speechStreamService = null;
        }
    }

    @Override
    public void onPartialResult(String hypothesis)
    {
    }

    @Override
    public void onError(Exception e) {
        setErrorState(e.getMessage());
    }

    @Override
    public void onTimeout() {
        setUiState(STATE_DONE);
    }

    private void setUiState(int state) {
        switch (state) {
            case STATE_START:
                resultView.setText(R.string.preparing);
                resultView.setMovementMethod(new ScrollingMovementMethod());
                findViewById(R.id.recognize_mic).setEnabled(false);
                findViewById(R.id.pause).setEnabled((false));
                break;
            case STATE_READY:
                resultView.setText(R.string.ready);
                ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
                findViewById(R.id.recognize_mic).setEnabled(true);
                findViewById(R.id.pause).setEnabled((false));
                break;
            case STATE_DONE:
                ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
                findViewById(R.id.recognize_mic).setEnabled(true);
                findViewById(R.id.pause).setEnabled((false));
                break;
            case STATE_FILE:
                resultView.setText(getString(R.string.starting));
                findViewById(R.id.recognize_mic).setEnabled(false);
                findViewById(R.id.pause).setEnabled((false));
                break;
            case STATE_MIC:
                ((Button) findViewById(R.id.recognize_mic)).setText(R.string.stop_microphone);
                resultView.setText(getString(R.string.say_something));
                findViewById(R.id.recognize_mic).setEnabled(true);
                findViewById(R.id.pause).setEnabled((true));
                break;
            default:
                throw new IllegalStateException("Unexpected value: "   state);
        }
    }

    private void setErrorState(String message) {
        resultView.setText(message);
        ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
        findViewById(R.id.recognize_mic).setEnabled(false);
    }

    private void recognizeFile() {
        if (speechStreamService != null) {
            setUiState(STATE_DONE);
            speechStreamService.stop();
            speechStreamService = null;
        } else {
            setUiState(STATE_FILE);
            try {
                Recognizer rec = new Recognizer(model, 16000.f, "["one zero zero zero one", "  
                        ""oh zero one two three four five six seven eight nine", "[unk]"]");

                InputStream ais = getAssets().open(
                        "10001-90210-01803.wav");
                if (ais.skip(44) != 44) throw new IOException("File too short");

                speechStreamService = new SpeechStreamService(rec, ais, 16000);
                speechStreamService.start(this);
            } catch (IOException e) {
                setErrorState(e.getMessage());
            }
        }
    }

    private void recognizeMicrophone() {
        if (speechService != null) {
            setUiState(STATE_DONE);
            speechService.stop();
            speechService = null;
        } else {
            setUiState(STATE_MIC);
            try {
                Recognizer rec = new Recognizer(model, 16000.0f);
                speechService = new SpeechService(rec, 16000.0f);
                speechService.startListening(this);
            } catch (IOException e) {
                setErrorState(e.getMessage());
            }
        }
    }


    private void pause(boolean checked) {
        if (speechService != null) {
            speechService.setPause(checked);
        }
    }

}
 

Комментарии:

1. Пожалуйста, предоставьте достаточно кода, чтобы другие могли лучше понять или воспроизвести проблему.