基於Accord.Audio和百度語言識別

總目錄地址:AI 系列 總目錄 html

 

 

須要最新源碼,或技術提問,請加QQ羣:538327407git

 

個人各類github 開源項目和代碼:https://github.com/linbin524github

 

目標需求微信

 

使用錄音形式,模擬微信語音聊天。按住錄音,鬆開發送語音,並完成語音識別。函數

ps:百度的語言識別有60秒長度限制,須要本身作好控制。post

 

實現方案ui

採用C# winform 程序實現桌面版,採用Accord 實現語音錄製中止等基礎語音操做,操做中止按鈕,this

自動調用百度語言識別接口將識別內容顯示在文本框中。編碼

備註,語音識別須要配套陣列麥克風,(請先註冊百度開發者)百度語音識別接口請參考:http://ai.baidu.com/docs#/ASR-Online-Csharp-SDK/topspa

 

實現效果展現

 

 

 

實現過程

 

一、下載Accord 完成語音操做引用

 

accord 官方 地址:http://accord-framework.net/intro.html

官網中有示例demo,筆者的就是在示例demo上作改造的。

 

 

創建本身的項目,引用包中的dll

 

 界面代碼:

using System;
using System.Drawing;
using System.IO;
using System.Windows.Forms;
using Accord.Audio;
using Accord.Audio.Formats;
using Accord.DirectSound;
using Accord.Audio.Filters;
using Baidu.Aip.API;

namespace SampleApp
{

    public partial class MainForm : Form
    {
        private MemoryStream stream;

        private IAudioSource source;
        private IAudioOutput output;

        private WaveEncoder encoder;
        private WaveDecoder decoder;

        private float[] current;

        private int frames;
        private int samples;
        private TimeSpan duration;
        /// <summary>
        /// 備註,語音識別須要配套陣列麥克風
        /// </summary>

        public MainForm()
        {
            InitializeComponent();

           
            // Configure the wavechart
            chart.SimpleMode = true;
            chart.AddWaveform("wave", Color.Green, 1, false);

            updateButtons();
           // Application.Idle += ProcessFrame;
        }
     
        void ProcessFrame(object sender, EventArgs e) {

          
        }
        /// <summary>
        ///   從聲卡開始錄製音頻
        /// </summary>
        /// 
        private void btnRecord_Click(object sender, EventArgs e)
        {
            // Create capture device
            source = new AudioCaptureDevice()//這裏是核心
            {
                // Listen on 22050 Hz
                DesiredFrameSize = 4096,
                SampleRate = 16000,//採樣率 
                //SampleRate = 22050,//採樣率
                Channels=1,
                // We will be reading 16-bit PCM
                Format = SampleFormat.Format16Bit
            };

            // Wire up some events
            source.NewFrame += source_NewFrame;
            source.AudioSourceError += source_AudioSourceError;

            // Create buffer for wavechart control
            current = new float[source.DesiredFrameSize];

            // Create stream to store file
            stream = new MemoryStream();
            encoder = new WaveEncoder(stream);

            // Start
            source.Start();
            updateButtons();
        }

        /// <summary>
        ///   播放錄製的音頻流。
        /// </summary>
        /// 
        private void btnPlay_Click(object sender, EventArgs e)
        {
            // First, we rewind the stream
            stream.Seek(0, SeekOrigin.Begin);

            // Then we create a decoder for it
            decoder = new WaveDecoder(stream);

            // Configure the track bar so the cursor
            // can show the proper current position
            if (trackBar1.Value < decoder.Frames)
                decoder.Seek(trackBar1.Value);
            trackBar1.Maximum = decoder.Samples;

            // Here we can create the output audio device that will be playing the recording
            output = new AudioOutputDevice(this.Handle, decoder.SampleRate, decoder.Channels);

            // Wire up some events
            output.FramePlayingStarted += output_FramePlayingStarted;
            output.NewFrameRequested += output_NewFrameRequested;
            output.Stopped += output_PlayingFinished;

            // Start playing!
            output.Play();

            updateButtons();
        }

        /// <summary>
        /// 中止錄製或播放流。
        /// </summary>
        /// 
        private void btnStop_Click(object sender, EventArgs e)
        {
            // Stops both cases
            if (source != null)
            {
                // If we were recording
                source.SignalToStop();
                source.WaitForStop();
            }
            if (output != null)
            {
                // If we were playing
                output.SignalToStop();
                output.WaitForStop();
            }

            updateButtons();

            // Also zero out the buffers and screen
            Array.Clear(current, 0, current.Length);
            updateWaveform(current, current.Length);
            SpeechAPI speechApi = new SpeechAPI();

            string result = speechApi.AsrData(stream,"wav");
            tb_result.Text = "語音識別結果:"+result;
        }



        /// <summary>
        /// 當音頻有錯誤時,將調用這個回調函數。 
        /// 
        ///   
        /// </summary>
        /// 
        private void source_AudioSourceError(object sender, AudioSourceErrorEventArgs e)
        {
            throw new Exception(e.Description);
        }

        /// <summary>
        ///  
        ///  每當有新的輸入音頻幀時,該方法將被調用。
        ///                                                      
        /// </summary>
        /// 
        private void source_NewFrame(object sender, NewFrameEventArgs eventArgs)
        {
           
            eventArgs.Signal.CopyTo(current);

        
            updateWaveform(current, eventArgs.Signal.Length);

         
            encoder.Encode(eventArgs.Signal);

          
            duration += eventArgs.Signal.Duration;
           
            samples += eventArgs.Signal.Samples;
            frames += eventArgs.Signal.Length;
        }


        private void output_FramePlayingStarted(object sender, PlayFrameEventArgs e)
        {
            updateTrackbar(e.FrameIndex);

            if (e.FrameIndex + e.Count < decoder.Frames)
            {
                int previous = decoder.Position;
                decoder.Seek(e.FrameIndex);

                Signal s = decoder.Decode(e.Count);
                decoder.Seek(previous);

                updateWaveform(s.ToFloat(), s.Length);
            }
        }

     
        private void output_PlayingFinished(object sender, EventArgs e)
        {
            updateButtons();

            Array.Clear(current, 0, current.Length);
            updateWaveform(current, current.Length);
        }

     
        /// 
        private void output_NewFrameRequested(object sender, NewFrameRequestedEventArgs e)
        {
         
            e.FrameIndex = decoder.Position;

           
            Signal signal = decoder.Decode(e.Frames);

            if (signal == null)
            {
                
                e.Stop = true;
                return;
            }

         
            e.Frames = signal.Length;

          
            signal.CopyTo(e.Buffer);
        }




        private void updateWaveform(float[] samples, int length)
        {
            if (InvokeRequired)
            {
                BeginInvoke(new Action(() =>
                {
                    chart.UpdateWaveform("wave", samples, length);
                }));
            }
            else
            {
                chart.UpdateWaveform("wave", current, length);
            }
        }

       
        /// 
        private void updateTrackbar(int value)
        {
            if (InvokeRequired)
            {
                BeginInvoke(new Action(() =>
                {
                    trackBar1.Value = Math.Max(trackBar1.Minimum, Math.Min(trackBar1.Maximum, value));
                }));
            }
            else
            {
                trackBar1.Value = Math.Max(trackBar1.Minimum, Math.Min(trackBar1.Maximum, value));
            }
        }

        private void updateButtons()
        {
            if (InvokeRequired)
            {
                BeginInvoke(new Action(updateButtons));
                return;
            }

            if (source != null && source.IsRunning)
            {
                btnBwd.Enabled = false;
                btnFwd.Enabled = false;
                btnPlay.Enabled = false;
                btnStop.Enabled = true;
                btnRecord.Enabled = false;
                trackBar1.Enabled = false;
            }
            else if (output != null && output.IsRunning)
            {
                btnBwd.Enabled = false;
                btnFwd.Enabled = false;
                btnPlay.Enabled = false;
                btnStop.Enabled = true;
                btnRecord.Enabled = false;
                trackBar1.Enabled = true;
            }
            else
            {
                btnBwd.Enabled = false;
                btnFwd.Enabled = false;
                btnPlay.Enabled = stream != null;
                btnStop.Enabled = false;
                btnRecord.Enabled = true;
                trackBar1.Enabled = decoder != null;

                trackBar1.Value = 0;
            }
        }

        private void MainFormFormClosed(object sender, FormClosedEventArgs e)
        {
            if (source != null) source.SignalToStop();
            if (output != null) output.SignalToStop();
        }

        private void saveFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e)
        {
            Stream fileStream = saveFileDialog1.OpenFile();
            stream.WriteTo(fileStream);
            fileStream.Close();
        }

        private void saveToolStripMenuItem_Click(object sender, EventArgs e)
        {
            saveFileDialog1.ShowDialog(this);
        }

       
        private void updateTimer_Tick(object sender, EventArgs e)
        {
            lbLength.Text = String.Format("Length: {0:00.00} sec.", duration.Seconds);
          
        }

        private void aboutToolStripMenuItem_Click(object sender, EventArgs e)
        {
            new AboutBox().ShowDialog(this);
        }

        private void closeToolStripMenuItem_Click(object sender, EventArgs e)
        {
            Close();
        }

        private void btnIncreaseVolume_Click(object sender, EventArgs e)
        {
            adjustVolume(1.25f);
        }

        private void btnDecreaseVolume_Click(object sender, EventArgs e)
        {
            adjustVolume(0.75f);
        }

        private void adjustVolume(float value)
        {
        
            stream.Seek(0, SeekOrigin.Begin);

     
            decoder = new WaveDecoder(stream);

            var signal = decoder.Decode();

           
            var volume = new VolumeFilter(value);
            volume.ApplyInPlace(signal);

   
            stream.Seek(0, SeekOrigin.Begin);
            encoder = new WaveEncoder(stream);
            encoder.Encode(signal);
        }

    }
}

 

 百度語音識別接口

百度已經提供sdk,對於支持語音格式以下。

支持的語音格式

原始 PCM 的錄音參數必須符合 8k/16k 採樣率、16bit 位深、單聲道,支持的格式有:pcm(不壓縮)、wav(不壓縮,pcm編碼)、amr(壓縮格式)。

 

        public string AsrData(string filePath, string format = "pcm", int rate = 16000)
        {
            var data =File.ReadAllBytes(filePath);
            var result = _asrClient.Recognize(data, format, 16000);
            return result.ToString();
        }

 

 結果評測:

對於普通的語言識別效果很差,須要陣列麥克風才能夠。

相關文章
相關標籤/搜索