轉載:http://billxia.diandian.com/post/2012-12-23/40049402032git
在Windows下,使用Microsoft Speech API(簡稱爲SAPI)能夠很簡單高效的實現語音識別,關於如何使用SAPI實現語音識別的文章請參見MVP尹成的博客 : github
而Speech SDK安裝後有一個Samples文件夾,裏面有C++/C#/VB的示例代碼能夠參考。
如今我想把基於SAPI的語音識別轉移到個人QT的項目裏,也就是在QT裏調用微軟的SAPI來實現語音識別。這個想法是很簡單的,但要實現的話,卻充滿了阻礙。我首先查了一下QT調用Win API的可能性,發現這是徹底能夠的;接下來就着手來實現了。
首次仍是封裝一個Speech Recognition的引擎類SREngine,其頭文件和源文件分別以下: web
/****************************************************
SREngine類,將MS SAPI的語音識別引擎封裝,用於語音識別
SREngine.h
****************************************************/
#ifndef SRENGINE_H
#define SRENGINE_H
#include <QString.h>
#include <QMessageBox>
// Microsoft Speech API
#undef UNICODE
#include <sapi.h>
#include <sphelper.h>
#include <spuihelp.h>
#include <comdef.h>
#define UNICODE
#include <windows.h>
#include <windowsx.h>
#include <commctrl.h>
#define WM_RECOEVENT WM_USER+100
#define GID_SRCMD_CN 1234
#define MYGRAMMARID 101
class SREngine
{
public:
SREngine();
~SREngine();
public:
//speech varibale
CComPtr <ISpRecognizer> m_cpRecognizer;
CComPtr <ISpRecoContext> m_cpRecoContext;
CComPtr <ISpRecoGrammar> m_cpCmdGrammar;
//audio variable
CComPtr <ISpAudio> m_cpAudio;
public:
HRESULT SetRuleState(const WCHAR * pszRuleName, const WCHAR *pszValue, BOOL fActivate);
HRESULT LoadCmdFromFile(QString XMLFileName);
HRESULT InitializeSapi(WId hWnd, UINT Msg);
};
#endif // SRENGINE_H
/*****************************************************************
SREngine.cpp
*****************************************************************/
#include "SREngine.h"
SREngine::SREngine()
{
}
SREngine::~SREngine()
{
}
HRESULT SREngine::InitializeSapi(WId hWnd, UINT Msg)
{
HRESULT hr = S_OK;
//FOR ONE NOT FOR ALL
/* 獨享模式的配置 */
hr = m_cpRecognizer.CoCreateInstance( CLSID_SpInprocRecognizer); //獨享模式
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Create recognizer error", MB_OK);
return hr;
}
hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio); //創建默認的音頻輸入對象
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Create default audio object error", MB_OK);
return hr;
}
hr = m_cpRecognizer ->SetInput(m_cpAudio, TRUE); //設置識別引擎輸入源
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error setINPUT", MB_OK);
return hr;
}
hr = m_cpRecognizer->CreateRecoContext(&m_cpRecoContext); //建立識別上下文接口
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error CreateRecoContext", MB_OK);
return hr;
}
hr = m_cpRecoContext->SetNotifyWindowMessage(hWnd, Msg, 0, 0); //設置識別消息,即將Msg消息綁定到hWnd這個窗體上,若是識別出告終果就會產生Msg這個消息,並會emit到hWnd這個窗體
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error SetNotifyWindowMessage", MB_OK);
return hr;
}
const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) |
SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) |
SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) |
SPFEI(SPEI_REQUEST_UI) | SPFEI(SPEI_RECO_STATE_CHANGE) |
SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE);
hr = m_cpRecoContext->SetInterest(ullInterest, ullInterest); //設置感興趣的事件
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error set interest", MB_OK);
}
return hr;
}
HRESULT SREngine::LoadCmdFromFile(QString XMLFileName)
{
HRESULT hr = S_OK;
if(!m_cpCmdGrammar)
{
hr = m_cpRecoContext ->CreateGrammar(MYGRAMMARID, &m_cpCmdGrammar); //命令式(command and control---C&C)
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error Creategammar", MB_OK);
return hr;
}
WCHAR wszXMLFile[256] = L"";
XMLFileName.toWCharArray(wszXMLFile); //ASNI TO UNICODE
//LAOD RULE FROME XML FILE
hr = m_cpCmdGrammar->LoadCmdFromFile(wszXMLFile, SPLO_DYNAMIC);
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "Error LoadCmdFromFile", MB_OK);
return hr;
}
}
return hr;
}
HRESULT SREngine::SetRuleState(const WCHAR * pszRuleName, const WCHAR *pszValue, BOOL fActivate)
{
HRESULT hr = S_OK;
if(fActivate)
{
hr = m_cpCmdGrammar ->SetRuleState(pszRuleName, NULL, SPRS_ACTIVE);
}
else
{
hr = m_cpCmdGrammar ->SetRuleState(pszRuleName, NULL, SPRS_INACTIVE);
}
return hr;
}
在MainWindow的ui上添加一個「開啓」按鈕,而後給它添加槽函數: 編程
void MainWindow:: on_pushButtonStart_clicked()
{
HRESULT hr = m_SREngine.InitializeSapi(this->winId(), WM_RECOEVENT); //初始化SAPI
if(FAILED(hr))
{
return;
}
QString grammarFileName = "../SpeechGrammar.xml";
hr = m_SREngine.LoadCmdFromFile(grammarFileName); //建立語法規則
if(FAILED(hr))
{
return;
}
/* 激活語音控制 */
hr = m_SREngine.SetRuleState(NULL, NULL, SPRS_ACTIVE);
if(FAILED(hr))
{
QMessageBox::information(NULL, "Error", "SetRuleState Active Error!", MB_OK);
return;
}
setWindowTitle("Sound Start");
ui->pushButtonStart->setEnabled(false);
}
實現的時候,一個比較棘手的問題是,如何將MFC的消息機制用QT來取代。我首先想到的固然是用信號槽機制來實現,可是這裏徹底跟信號槽對不上號啊,套不進去啊!!!嘗試了半天,在google和baidu上搜啊搜,找到了不多比較相關的資料。沒辦法我就加QT技術的QQ羣,問羣裏大牛,但可能我沒描述清楚,仍是解決不了。
最後,仍是google搜,經過搜QT和WinAPI混合編程,找到了一兩個比較好的結果,裏面有提到用winEvent來截獲窗體的消息,因而看到但願了,哈哈哈。個人winEvent函數以下: windows
bool MainWindow::winEvent(MSG* pMsg, long* result)
{
setWindowTitle("Control - Debug: winEvent");
if(pMsg->message == WM_RECOEVENT)
{
*result = this->OnRecoEvent(); //OnRecoEvent函數是具體的處理過程,其中可獲取識別結果並對不一樣的結果作相應的處理
}
return false;
}
這裏的winEvent函數就至關於WinAPI裏的WinProc函數,但不須要用信號槽來顯式的將信號和槽connect起來,只要有該窗體有消息產生,它就會執行。這樣就將消息機制實現了,消息從SAPI的函數裏產生,發送到了MainWindow窗體,再在MainWindow的winEvent函數裏截獲該消息進行相應的處理。
該實例的全部源文件已上傳到GitHub:https://github.com/ibillxia/Demo/tree/master/QtSAPIDemoapi