ROS中階筆記(六):機器人感知—機器語音linux
登陸科大訊飛開放平臺的官方網站:http://www.xfyun.cn/,註冊一個帳戶並下載SDKios
下載的SDK文件(Linux_lat···文件)——解壓——示例在samples文件夾下面(好比iat_record_sample,剛下載完成功能包以後,須要咋當前目錄下面編譯,經過make編譯)
c++
編譯以後的文件放在bin文件夾下面的,能夠看到有一個生成的可執行文件iat_record_sample;
shell
將科大訊飛SDK的庫文件拷貝到系統目錄下微信
$ sudo cp libmsc.so/usr/lib/libmsc.so # (注意選擇相應的處理器架構) # SDK的庫文件在libs中有(x86和x64,在這裏面放置的庫文件)
科大訊飛的SDK帶有ID號,每一個人每次下載後的ID都不相同,更換SDK以後須要修改代碼中的APPID,你也能夠直接使用本課程的libmsc.so文件,不然須要將源碼中的APPID修改成本身下載SDK中的ID。session
iat_publish.cpp架構
/* * 語音聽寫(iFly Auto Transform)技術可以實時地將語音轉換成對應的文字。 */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <unistd.h> #include "robot_voice/qisr.h" #include "robot_voice/msp_cmn.h" #include "robot_voice/msp_errors.h" #include "robot_voice/speech_recognizer.h" #include <iconv.h> #include "ros/ros.h" #include "std_msgs/String.h" #define FRAME_LEN 640 #define BUFFER_SIZE 4096 int wakeupFlag = 0 ; int resultFlag = 0 ; static void show_result(char *string, char is_over) { resultFlag=1; printf("\rResult: [ %s ]", string); if(is_over) putchar('\n'); } static char *g_result = NULL; static unsigned int g_buffersize = BUFFER_SIZE; void on_result(const char *result, char is_last) { if (result) { size_t left = g_buffersize - 1 - strlen(g_result); size_t size = strlen(result); if (left < size) { g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE); if (g_result) g_buffersize += BUFFER_SIZE; else { printf("mem alloc failed\n"); return; } } strncat(g_result, result, size); show_result(g_result, is_last); } } void on_speech_begin() { if (g_result) { free(g_result); } g_result = (char*)malloc(BUFFER_SIZE); g_buffersize = BUFFER_SIZE; memset(g_result, 0, g_buffersize); printf("Start Listening...\n"); } void on_speech_end(int reason) { if (reason == END_REASON_VAD_DETECT) printf("\nSpeaking done \n"); else printf("\nRecognizer error %d\n", reason); } /* demo recognize the audio from microphone */ static void demo_mic(const char* session_begin_params) { int errcode; int i = 0; struct speech_rec iat; struct speech_rec_notifier recnotifier = { on_result, on_speech_begin, on_speech_end }; errcode = sr_init(&iat, session_begin_params, SR_MIC, &recnotifier); if (errcode) { printf("speech recognizer init failed\n"); return; } errcode = sr_start_listening(&iat); if (errcode) { printf("start listen failed %d\n", errcode); } /* demo 10 seconds recording */ while(i++ < 10) sleep(1); errcode = sr_stop_listening(&iat); if (errcode) { printf("stop listening failed %d\n", errcode); } sr_uninit(&iat); } /* main thread: start/stop record ; query the result of recgonization. * record thread: record callback(data write) * helper thread: ui(keystroke detection) */ void WakeUp(const std_msgs::String::ConstPtr& msg) { printf("waking up\r\n"); usleep(700*1000); wakeupFlag=1; } int main(int argc, char* argv[]) { // 初始化ROS ros::init(argc, argv, "voiceRecognition"); ros::NodeHandle n; ros::Rate loop_rate(10); // 聲明Publisher和Subscriber // 訂閱喚醒語音識別的信號 ros::Subscriber wakeUpSub = n.subscribe("voiceWakeup", 1000, WakeUp); // 訂閱喚醒語音識別的信號 ros::Publisher voiceWordsPub = n.advertise<std_msgs::String>("voiceWords", 1000); ROS_INFO("Sleeping..."); int count=0; while(ros::ok()) { // 語音識別喚醒 if (wakeupFlag){ ROS_INFO("Wakeup..."); int ret = MSP_SUCCESS; const char* login_params = "appid = 594a7b46, work_dir = ."; const char* session_begin_params = "sub = iat, domain = iat, language = zh_cn, " "accent = mandarin, sample_rate = 16000, " "result_type = plain, result_encoding = utf8"; ret = MSPLogin(NULL, NULL, login_params); if(MSP_SUCCESS != ret){ MSPLogout(); printf("MSPLogin failed , Error code %d.\n",ret); } printf("Demo recognizing the speech from microphone\n"); printf("Speak in 10 seconds\n"); demo_mic(session_begin_params); printf("10 sec passed\n"); wakeupFlag=0; MSPLogout(); } // 語音識別完成 if(resultFlag){ resultFlag=0; std_msgs::String msg; msg.data = g_result; voiceWordsPub.publish(msg); } ros::spinOnce(); loop_rate.sleep(); count++; } exit: MSPLogout(); // Logout... return 0; }
subscriber(訂閱者):用來接收語音喚醒信號,接收到喚醒信號以後,會將wakeupFlag變量置位;app
主循環中調用SDK的語音聽寫功能,識別成功後置位resultFlag變量,經過Publisher將識別出來的字符串發佈。dom
第一步,在CMakeLists.txt中加入編譯規則機器學習
add_executable(iat_publish src/iat_publish.cpp src/speech_recognizer.c src/linuxrec.c) target_link_libraries( iat_publish ${catkin_LIBRARIES} libmsc.so -ldl -lpthread -lm -lrt -lasound )
編譯過程須要連接SDK中的libmsc.so庫,此前已經將此庫拷貝到系統路徑下,因此此處不須要添加完整路徑。
第二步,語音聽寫示例
$ roscore $ rosrun robot_voice iat_publish $ rostopic pub /voiceWakeup std_msgs/String "data:'any string'" 'any string' ''裏面能夠隨便填
語音合成(Text To Speech,TTS)技術可以自動將任意文字實時轉換爲連續的天然語音,是一種可以在任什麼時候間、任何地點,向任何人提供語音信息服務的高效便捷手段,很是符合信息時代海量數據、動態更新和個性化查詢的需求。
tts_subscribe.cpp
/* * 語音合成(Text To Speech,TTS)技術可以自動將任意文字實時轉換爲連續的 * 天然語音,是一種可以在任什麼時候間、任何地點,向任何人提供語音信息服務的 * 高效便捷手段,很是符合信息時代海量數據、動態更新和個性化查詢的需求。 */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include "robot_voice/qtts.h" #include "robot_voice/msp_cmn.h" #include "robot_voice/msp_errors.h" #include "ros/ros.h" #include "std_msgs/String.h" #include <sstream> #include <sys/types.h> #include <sys/stat.h> /* wav音頻頭部格式 */ typedef struct _wave_pcm_hdr { char riff[4]; // = "RIFF" int size_8; // = FileSize - 8 char wave[4]; // = "WAVE" char fmt[4]; // = "fmt " int fmt_size; // = 下一個結構體的大小 : 16 short int format_tag; // = PCM : 1 short int channels; // = 通道數 : 1 int samples_per_sec; // = 採樣率 : 8000 | 6000 | 11025 | 16000 int avg_bytes_per_sec; // = 每秒字節數 : samples_per_sec * bits_per_sample / 8 short int block_align; // = 每採樣點字節數 : wBitsPerSample / 8 short int bits_per_sample; // = 量化比特數: 8 | 16 char data[4]; // = "data"; int data_size; // = 純數據長度 : FileSize - 44 } wave_pcm_hdr; /* 默認wav音頻頭部數據 */ wave_pcm_hdr default_wav_hdr = { { 'R', 'I', 'F', 'F' }, 0, {'W', 'A', 'V', 'E'}, {'f', 'm', 't', ' '}, 16, 1, 1, 16000, 32000, 2, 16, {'d', 'a', 't', 'a'}, 0 }; /* 文本合成 */ int text_to_speech(const char* src_text, const char* des_path, const char* params) { int ret = -1; FILE* fp = NULL; const char* sessionID = NULL; unsigned int audio_len = 0; wave_pcm_hdr wav_hdr = default_wav_hdr; int synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA; if (NULL == src_text || NULL == des_path) { printf("params is error!\n"); return ret; } fp = fopen(des_path, "wb"); if (NULL == fp) { printf("open %s error.\n", des_path); return ret; } /* 開始合成 */ sessionID = QTTSSessionBegin(params, &ret); if (MSP_SUCCESS != ret) { printf("QTTSSessionBegin failed, error code: %d.\n", ret); fclose(fp); return ret; } ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL); if (MSP_SUCCESS != ret) { printf("QTTSTextPut failed, error code: %d.\n",ret); QTTSSessionEnd(sessionID, "TextPutError"); fclose(fp); return ret; } printf("正在合成 ...\n"); fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音頻頭,使用採樣率爲16000 while (1) { /* 獲取合成音頻 */ const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret); if (MSP_SUCCESS != ret) break; if (NULL != data) { fwrite(data, audio_len, 1, fp); wav_hdr.data_size += audio_len; //計算data_size大小 } if (MSP_TTS_FLAG_DATA_END == synth_status) break; printf(">"); usleep(150*1000); //防止頻繁佔用CPU }//合成狀態synth_status取值請參閱《訊飛語音雲API文檔》 printf("\n"); if (MSP_SUCCESS != ret) { printf("QTTSAudioGet failed, error code: %d.\n",ret); QTTSSessionEnd(sessionID, "AudioGetError"); fclose(fp); return ret; } /* 修正wav文件頭數據的大小 */ wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8); /* 將修正過的數據寫回文件頭部,音頻文件爲wav格式 */ fseek(fp, 4, 0); fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //寫入size_8的值 fseek(fp, 40, 0); //將文件指針偏移到存儲data_size值的位置 fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //寫入data_size的值 fclose(fp); fp = NULL; /* 合成完畢 */ ret = QTTSSessionEnd(sessionID, "Normal"); if (MSP_SUCCESS != ret) { printf("QTTSSessionEnd failed, error code: %d.\n",ret); } return ret; } void voiceWordsCallback(const std_msgs::String::ConstPtr& msg) { char cmd[2000]; const char* text; int ret = MSP_SUCCESS; const char* session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2"; const char* filename = "tts_sample.wav"; //合成的語音文件名稱 std::cout<<"I heard :"<<msg->data.c_str()<<std::endl; text = msg->data.c_str(); /* 文本合成 */ printf("開始合成 ...\n"); ret = text_to_speech(text, filename, session_begin_params); if (MSP_SUCCESS != ret) { printf("text_to_speech failed, error code: %d.\n", ret); } printf("合成完畢\n"); unlink("/tmp/cmd"); mkfifo("/tmp/cmd", 0777); popen("mplayer -quiet -slave -input file=/tmp/cmd 'tts_sample.wav'","r"); sleep(3); } void toExit() { printf("按任意鍵退出 ...\n"); getchar(); MSPLogout(); //退出登陸 } int main(int argc, char* argv[]) { int ret = MSP_SUCCESS; const char* login_params = "appid = 594a7b46, work_dir = .";//登陸參數,appid與msc庫綁定,請勿隨意改動 /* * rdn: 合成音頻數字發音方式 * volume: 合成音頻的音量 * pitch: 合成音頻的音調 * speed: 合成音頻對應的語速 * voice_name: 合成發音人 * sample_rate: 合成音頻採樣率 * text_encoding: 合成文本編碼格式 * * 詳細參數說明請參閱《訊飛語音雲MSC--API文檔》 */ /* 用戶登陸 */ ret = MSPLogin(NULL, NULL, login_params);//第一個參數是用戶名,第二個參數是密碼,第三個參數是登陸參數,用戶名和密碼可在http://open.voicecloud.cn註冊獲取 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n", ret); /*goto exit ;*///登陸失敗,退出登陸 toExit(); } printf("\n###########################################################################\n"); printf("## 語音合成(Text To Speech,TTS)技術可以自動將任意文字實時轉換爲連續的 ##\n"); printf("## 天然語音,是一種可以在任什麼時候間、任何地點,向任何人提供語音信息服務的 ##\n"); printf("## 高效便捷手段,很是符合信息時代海量數據、動態更新和個性化查詢的需求。 ##\n"); printf("###########################################################################\n\n"); ros::init(argc,argv,"TextToSpeech"); ros::NodeHandle n; ros::Subscriber sub =n.subscribe("voiceWords", 1000,voiceWordsCallback); ros::spin(); exit: printf("按任意鍵退出 ...\n"); getchar(); MSPLogout(); //退出登陸 return 0; }
第一步,在CMakeLists.txt中加入編譯規則
add_executable(tts_subscribe src/tts_subscribe.cpp) target_link_libraries( tts_subscribe ${catkin_LIBRARIES} libmsc.so -ldl -pthread )
第二步,語音合成示例
$ roscore $ rosrun robot_voice tts_subscribe $ rostopic pub/voiceWords std_msgs/String"data:'你好,我是機器人'"
語音合成啓動後可能產生的錯誤
緣由:mplayer配置致使
解決方法:在$HOME/.mplayer/config文件中添加以下設置:lirc=no
voice_assistant.cpp
#include <iostream> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <time.h> #include "robot_voice/qtts.h" #include "robot_voice/msp_cmn.h" #include "robot_voice/msp_errors.h" #include "ros/ros.h" #include "std_msgs/String.h" #include <sstream> #include <sys/types.h> #include <sys/stat.h> /* wav音頻頭部格式 */ typedef struct _wave_pcm_hdr { char riff[4]; // = "RIFF" int size_8; // = FileSize - 8 char wave[4]; // = "WAVE" char fmt[4]; // = "fmt " int fmt_size; // = 下一個結構體的大小 : 16 short int format_tag; // = PCM : 1 short int channels; // = 通道數 : 1 int samples_per_sec; // = 採樣率 : 8000 | 6000 | 11025 | 16000 int avg_bytes_per_sec; // = 每秒字節數 : samples_per_sec * bits_per_sample / 8 short int block_align; // = 每採樣點字節數 : wBitsPerSample / 8 short int bits_per_sample; // = 量化比特數: 8 | 16 char data[4]; // = "data"; int data_size; // = 純數據長度 : FileSize - 44 } wave_pcm_hdr; /* 默認wav音頻頭部數據 */ wave_pcm_hdr default_wav_hdr = { { 'R', 'I', 'F', 'F' }, 0, {'W', 'A', 'V', 'E'}, {'f', 'm', 't', ' '}, 16, 1, 1, 16000, 32000, 2, 16, {'d', 'a', 't', 'a'}, 0 }; /* 文本合成 */ int text_to_speech(const char* src_text, const char* des_path, const char* params) { int ret = -1; FILE* fp = NULL; const char* sessionID = NULL; unsigned int audio_len = 0; wave_pcm_hdr wav_hdr = default_wav_hdr; int synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA; if (NULL == src_text || NULL == des_path) { printf("params is error!\n"); return ret; } fp = fopen(des_path, "wb"); if (NULL == fp) { printf("open %s error.\n", des_path); return ret; } /* 開始合成 */ sessionID = QTTSSessionBegin(params, &ret); if (MSP_SUCCESS != ret) { printf("QTTSSessionBegin failed, error code: %d.\n", ret); fclose(fp); return ret; } ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL); if (MSP_SUCCESS != ret) { printf("QTTSTextPut failed, error code: %d.\n",ret); QTTSSessionEnd(sessionID, "TextPutError"); fclose(fp); return ret; } printf("正在合成 ...\n"); fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音頻頭,使用採樣率爲16000 while (1) { /* 獲取合成音頻 */ const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret); if (MSP_SUCCESS != ret) break; if (NULL != data) { fwrite(data, audio_len, 1, fp); wav_hdr.data_size += audio_len; //計算data_size大小 } if (MSP_TTS_FLAG_DATA_END == synth_status) break; printf(">"); usleep(150*1000); //防止頻繁佔用CPU }//合成狀態synth_status取值請參閱《訊飛語音雲API文檔》 printf("\n"); if (MSP_SUCCESS != ret) { printf("QTTSAudioGet failed, error code: %d.\n",ret); QTTSSessionEnd(sessionID, "AudioGetError"); fclose(fp); return ret; } /* 修正wav文件頭數據的大小 */ wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8); /* 將修正過的數據寫回文件頭部,音頻文件爲wav格式 */ fseek(fp, 4, 0); fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //寫入size_8的值 fseek(fp, 40, 0); //將文件指針偏移到存儲data_size值的位置 fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //寫入data_size的值 fclose(fp); fp = NULL; /* 合成完畢 */ ret = QTTSSessionEnd(sessionID, "Normal"); if (MSP_SUCCESS != ret) { printf("QTTSSessionEnd failed, error code: %d.\n",ret); } return ret; } std::string to_string(int val) { char buf[20]; sprintf(buf, "%d", val); return std::string(buf); } void voiceWordsCallback(const std_msgs::String::ConstPtr& msg) { char cmd[2000]; const char* text; int ret = MSP_SUCCESS; const char* session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2"; const char* filename = "tts_sample.wav"; //合成的語音文件名稱 std::cout<<"I heard :"<<msg->data.c_str()<<std::endl; std::string dataString = msg->data; if(dataString.compare("你是誰?") == 0) { char nameString[40] = "我是你的語音小助手"; text = nameString; std::cout<<text<<std::endl; } else if(dataString.compare("你能夠作什麼?") == 0) { char helpString[40] = "你能夠問我如今時間"; text = helpString; std::cout<<text<<std::endl; } else if(dataString.compare("如今時間。") == 0) { //獲取當前時間 struct tm *ptm; long ts; ts = time(NULL); ptm = localtime(&ts); std::string string = "如今時間" + to_string(ptm-> tm_hour) + "點" + to_string(ptm-> tm_min) + "分"; char timeString[40]; string.copy(timeString, sizeof(string), 0); text = timeString; std::cout<<text<<std::endl; } else { text = msg->data.c_str(); } /* 文本合成 */ printf("開始合成 ...\n"); ret = text_to_speech(text, filename, session_begin_params); if (MSP_SUCCESS != ret) { printf("text_to_speech failed, error code: %d.\n", ret); } printf("合成完畢\n"); unlink("/tmp/cmd"); mkfifo("/tmp/cmd", 0777); popen("mplayer -quiet -slave -input file=/tmp/cmd 'tts_sample.wav'","r"); sleep(3); } void toExit() { printf("按任意鍵退出 ...\n"); getchar(); MSPLogout(); //退出登陸 } int main(int argc, char* argv[]) { int ret = MSP_SUCCESS; const char* login_params = "appid = 594a7b46, work_dir = .";//登陸參數,appid與msc庫綁定,請勿隨意改動 /* * rdn: 合成音頻數字發音方式 * volume: 合成音頻的音量 * pitch: 合成音頻的音調 * speed: 合成音頻對應的語速 * voice_name: 合成發音人 * sample_rate: 合成音頻採樣率 * text_encoding: 合成文本編碼格式 * * 詳細參數說明請參閱《訊飛語音雲MSC--API文檔》 */ /* 用戶登陸 */ ret = MSPLogin(NULL, NULL, login_params);//第一個參數是用戶名,第二個參數是密碼,第三個參數是登陸參數,用戶名和密碼可在http://open.voicecloud.cn註冊獲取 if (MSP_SUCCESS != ret) { printf("MSPLogin failed, error code: %d.\n", ret); /*goto exit ;*///登陸失敗,退出登陸 toExit(); } printf("\n###########################################################################\n"); printf("## 語音合成(Text To Speech,TTS)技術可以自動將任意文字實時轉換爲連續的 ##\n"); printf("## 天然語音,是一種可以在任什麼時候間、任何地點,向任何人提供語音信息服務的 ##\n"); printf("## 高效便捷手段,很是符合信息時代海量數據、動態更新和個性化查詢的需求。 ##\n"); printf("###########################################################################\n\n"); ros::init(argc,argv,"TextToSpeech"); ros::NodeHandle n; ros::Subscriber sub =n.subscribe("voiceWords", 1000,voiceWordsCallback); ros::spin(); exit: printf("按任意鍵退出 ...\n"); getchar(); MSPLogout(); //退出登陸 return 0; }
第一步,在CMakeLists.txt中加入編譯規則
add_executable(voice_assistant src/voice_assistant.cpp) target_link_libraries( voice_assistant ${catkin_LIBRARIES} libmsc.so -ldl -pthread )
第二步,智能語音助手示例
$ roscore $ rosrun robot_voice iat_publish # 啓動語音識別節點 $ rosrun robot_voice voice_assistant # 啓動語音助手做爲語音判斷 $ rostopic pub /voiceWakeup std_msgs/String "data:'any string'" # 喚醒詞
1.ROS探索總結(二十八)——機器聽覺
2.ROS Kinetic使用PocketSphinx進行語音識別
https://blog.csdn.net/seeseeatre/article/details/79228816
3.訊飛開放平臺-以語音交互爲核心的人工智能開放平臺
微信公衆號:喵哥解說 公衆號介紹:主要研究機器學習、計算機視覺、深度學習、ROS等相關內容,分享學習過程當中的學習筆記和心得!期待您的關注,歡迎一塊兒學習交流進步!同時還有1200G的Python視頻和書籍資料等你領取!!!