總目錄地址:AI 系列 總目錄 html
須要最新源碼,或技術提問,請加QQ羣:538327407git
個人各類github 開源項目和代碼:https://github.com/linbin524github
目標需求微信
使用錄音形式,模擬微信語音聊天。按住錄音,鬆開發送語音,並完成語音識別。函數
ps:百度的語言識別有60秒長度限制,須要本身作好控制。post
實現方案ui
採用C# winform 程序實現桌面版,採用Accord 實現語音錄製中止等基礎語音操做,操做中止按鈕,this
自動調用百度語言識別接口將識別內容顯示在文本框中。編碼
備註,語音識別須要配套陣列麥克風,(請先註冊百度開發者)百度語音識別接口請參考:http://ai.baidu.com/docs#/ASR-Online-Csharp-SDK/topspa
實現效果展現
實現過程
一、下載Accord 完成語音操做引用
accord 官方 地址:http://accord-framework.net/intro.html
官網中有示例demo,筆者的就是在示例demo上作改造的。
創建本身的項目,引用包中的dll
界面代碼:
using System; using System.Drawing; using System.IO; using System.Windows.Forms; using Accord.Audio; using Accord.Audio.Formats; using Accord.DirectSound; using Accord.Audio.Filters; using Baidu.Aip.API; namespace SampleApp { public partial class MainForm : Form { private MemoryStream stream; private IAudioSource source; private IAudioOutput output; private WaveEncoder encoder; private WaveDecoder decoder; private float[] current; private int frames; private int samples; private TimeSpan duration; /// <summary> /// 備註,語音識別須要配套陣列麥克風 /// </summary> public MainForm() { InitializeComponent(); // Configure the wavechart chart.SimpleMode = true; chart.AddWaveform("wave", Color.Green, 1, false); updateButtons(); // Application.Idle += ProcessFrame; } void ProcessFrame(object sender, EventArgs e) { } /// <summary> /// 從聲卡開始錄製音頻 /// </summary> /// private void btnRecord_Click(object sender, EventArgs e) { // Create capture device source = new AudioCaptureDevice()//這裏是核心 { // Listen on 22050 Hz DesiredFrameSize = 4096, SampleRate = 16000,//採樣率 //SampleRate = 22050,//採樣率 Channels=1, // We will be reading 16-bit PCM Format = SampleFormat.Format16Bit }; // Wire up some events source.NewFrame += source_NewFrame; source.AudioSourceError += source_AudioSourceError; // Create buffer for wavechart control current = new float[source.DesiredFrameSize]; // Create stream to store file stream = new MemoryStream(); encoder = new WaveEncoder(stream); // Start source.Start(); updateButtons(); } /// <summary> /// 播放錄製的音頻流。 /// </summary> /// private void btnPlay_Click(object sender, EventArgs e) { // First, we rewind the stream stream.Seek(0, SeekOrigin.Begin); // Then we create a decoder for it decoder = new WaveDecoder(stream); // Configure the track bar so the cursor // can show the proper current position if (trackBar1.Value < decoder.Frames) decoder.Seek(trackBar1.Value); trackBar1.Maximum = decoder.Samples; // Here we can create the output audio device that will be playing the recording output = new AudioOutputDevice(this.Handle, decoder.SampleRate, decoder.Channels); // Wire up some events output.FramePlayingStarted += output_FramePlayingStarted; output.NewFrameRequested += output_NewFrameRequested; output.Stopped += output_PlayingFinished; // Start playing! output.Play(); updateButtons(); } /// <summary> /// 中止錄製或播放流。 /// </summary> /// private void btnStop_Click(object sender, EventArgs e) { // Stops both cases if (source != null) { // If we were recording source.SignalToStop(); source.WaitForStop(); } if (output != null) { // If we were playing output.SignalToStop(); output.WaitForStop(); } updateButtons(); // Also zero out the buffers and screen Array.Clear(current, 0, current.Length); updateWaveform(current, current.Length); SpeechAPI speechApi = new SpeechAPI(); string result = speechApi.AsrData(stream,"wav"); tb_result.Text = "語音識別結果:"+result; } /// <summary> /// 當音頻有錯誤時,將調用這個回調函數。 /// /// /// </summary> /// private void source_AudioSourceError(object sender, AudioSourceErrorEventArgs e) { throw new Exception(e.Description); } /// <summary> /// /// 每當有新的輸入音頻幀時,該方法將被調用。 /// /// </summary> /// private void source_NewFrame(object sender, NewFrameEventArgs eventArgs) { eventArgs.Signal.CopyTo(current); updateWaveform(current, eventArgs.Signal.Length); encoder.Encode(eventArgs.Signal); duration += eventArgs.Signal.Duration; samples += eventArgs.Signal.Samples; frames += eventArgs.Signal.Length; } private void output_FramePlayingStarted(object sender, PlayFrameEventArgs e) { updateTrackbar(e.FrameIndex); if (e.FrameIndex + e.Count < decoder.Frames) { int previous = decoder.Position; decoder.Seek(e.FrameIndex); Signal s = decoder.Decode(e.Count); decoder.Seek(previous); updateWaveform(s.ToFloat(), s.Length); } } private void output_PlayingFinished(object sender, EventArgs e) { updateButtons(); Array.Clear(current, 0, current.Length); updateWaveform(current, current.Length); } /// private void output_NewFrameRequested(object sender, NewFrameRequestedEventArgs e) { e.FrameIndex = decoder.Position; Signal signal = decoder.Decode(e.Frames); if (signal == null) { e.Stop = true; return; } e.Frames = signal.Length; signal.CopyTo(e.Buffer); } private void updateWaveform(float[] samples, int length) { if (InvokeRequired) { BeginInvoke(new Action(() => { chart.UpdateWaveform("wave", samples, length); })); } else { chart.UpdateWaveform("wave", current, length); } } /// private void updateTrackbar(int value) { if (InvokeRequired) { BeginInvoke(new Action(() => { trackBar1.Value = Math.Max(trackBar1.Minimum, Math.Min(trackBar1.Maximum, value)); })); } else { trackBar1.Value = Math.Max(trackBar1.Minimum, Math.Min(trackBar1.Maximum, value)); } } private void updateButtons() { if (InvokeRequired) { BeginInvoke(new Action(updateButtons)); return; } if (source != null && source.IsRunning) { btnBwd.Enabled = false; btnFwd.Enabled = false; btnPlay.Enabled = false; btnStop.Enabled = true; btnRecord.Enabled = false; trackBar1.Enabled = false; } else if (output != null && output.IsRunning) { btnBwd.Enabled = false; btnFwd.Enabled = false; btnPlay.Enabled = false; btnStop.Enabled = true; btnRecord.Enabled = false; trackBar1.Enabled = true; } else { btnBwd.Enabled = false; btnFwd.Enabled = false; btnPlay.Enabled = stream != null; btnStop.Enabled = false; btnRecord.Enabled = true; trackBar1.Enabled = decoder != null; trackBar1.Value = 0; } } private void MainFormFormClosed(object sender, FormClosedEventArgs e) { if (source != null) source.SignalToStop(); if (output != null) output.SignalToStop(); } private void saveFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e) { Stream fileStream = saveFileDialog1.OpenFile(); stream.WriteTo(fileStream); fileStream.Close(); } private void saveToolStripMenuItem_Click(object sender, EventArgs e) { saveFileDialog1.ShowDialog(this); } private void updateTimer_Tick(object sender, EventArgs e) { lbLength.Text = String.Format("Length: {0:00.00} sec.", duration.Seconds); } private void aboutToolStripMenuItem_Click(object sender, EventArgs e) { new AboutBox().ShowDialog(this); } private void closeToolStripMenuItem_Click(object sender, EventArgs e) { Close(); } private void btnIncreaseVolume_Click(object sender, EventArgs e) { adjustVolume(1.25f); } private void btnDecreaseVolume_Click(object sender, EventArgs e) { adjustVolume(0.75f); } private void adjustVolume(float value) { stream.Seek(0, SeekOrigin.Begin); decoder = new WaveDecoder(stream); var signal = decoder.Decode(); var volume = new VolumeFilter(value); volume.ApplyInPlace(signal); stream.Seek(0, SeekOrigin.Begin); encoder = new WaveEncoder(stream); encoder.Encode(signal); } } }
百度語音識別接口
百度已經提供sdk,對於支持語音格式以下。
原始 PCM 的錄音參數必須符合 8k/16k 採樣率、16bit 位深、單聲道,支持的格式有:pcm(不壓縮)、wav(不壓縮,pcm編碼)、amr(壓縮格式)。
public string AsrData(string filePath, string format = "pcm", int rate = 16000) { var data =File.ReadAllBytes(filePath); var result = _asrClient.Recognize(data, format, 16000); return result.ToString(); }
結果評測:
對於普通的語言識別效果很差,須要陣列麥克風才能夠。