#speech-recognition #speech-to-text #microsoft-cognitive #naudio
Вопрос:
Желаемая Ситуация
Я хочу слушать звук, исходящий с устройства (ноутбука), и конвертировать его в текст в режиме реального времени (без сохранения в wav-файл).
Текущая Ситуация
Я могу записывать и сохранять выходной звук в wav-файл с помощью NAudio.
Ниже приведен пример кода для сохранения выходных данных в wav-файл. этот код работает нормально
string outputFileName = @"xxxxxxrecievers.wav";
var capture = new WasapiLoopbackCapture();
var writer = new WaveFileWriter(outputFileName, capture.WaveFormat);
capture.DataAvailable = async (s, e) =>
{
if (writer != null)
{
await writer.WriteAsync(e.Buffer, 0, e.BytesRecorded);
await writer.FlushAsync();
}
};
capture.RecordingStopped = (s, e) =>
{
if (writer != null)
{
writer.Dispose();
writer = null;
}
capture.Dispose();
};
capture.StartRecording();
Console.WriteLine("Record Started, Press Any key to stop the record");
Console.ReadLine();
capture.StopRecording();
Чтобы преобразовать речь в текст в реальном времени, я пытаюсь переместить буфер в PushAudioStream в следующий пример кода
public static async Task RecognitionWithPushAudioStreamAsync()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");
var stopRecognition = new TaskCompletionSource<int>();
// Create a push stream
using (var pushStream = AudioInputStream.CreatePushStream())
{
using (var audioInput = AudioConfig.FromStreamInput(pushStream))
{
// Creates a speech recognizer using audio stream input.
using (var recognizer = new SpeechRecognizer(config, audioInput))
{
// Subscribes to events.
recognizer.Recognizing = (s, e) =>
{
Console.WriteLine($"RECOGNIZING: Text={e.Result.Text}");
};
recognizer.Recognized = (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
}
else if (e.Result.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
};
recognizer.Canceled = (s, e) =>
{
Console.WriteLine($"CANCELED: Reason={e.Reason}");
if (e.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={e.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={e.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you update the subscription info?");
}
stopRecognition.TrySetResult(0);
};
recognizer.SessionStarted = (s, e) =>
{
Console.WriteLine("nSession started event.");
};
recognizer.SessionStopped = (s, e) =>
{
Console.WriteLine("nSession stopped event.");
Console.WriteLine("nStop recognition.");
stopRecognition.TrySetResult(0);
};
// Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
var capture = new WasapiLoopbackCapture();
var writer = new WaveFileWriter(outputFileName, capture.WaveFormat);
capture.DataAvailable = async (s, e) =>
{
if (writer != null)
{
await writer.WriteAsync(e.Buffer, 0, e.BytesRecorded);
await writer.FlushAsync();
pushStream.Write(e.Buffer, e.BytesRecorded); // try to push buffer here
}
};
capture.RecordingStopped = (s, e) =>
{
if (writer != null)
{
writer.Dispose();
writer = null;
}
capture.Dispose();
};
capture.StartRecording();
Console.WriteLine("Record Started, Press Any key to stop the record");
Console.ReadLine();
capture.StopRecording();
pushStream.Close();
// Waits for completion.
// Use Task.WaitAny to keep the task rooted.
Task.WaitAny(new[] { stopRecognition.Task });
// Stops recognition.
await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
}
in short I am opening a pushstream like
using (var pushStream = AudioInputStream.CreatePushStream())
и пытается протолкнуть буфер в поток, как
pushStream.Write(e.Buffer, e.BytesRecorded);
Не удается распознать речь с помощью когнитивной службы.
Заранее спасибо за помощь