轉成語音javascript
pip install baidu-aip #安裝百度ai
上面介紹相關使用html
from aip import AipSpeech #aip是SDK包裏的文件 """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis('你好', 'zh', 1, { 'vol': 5, 'spd':3, 'pit':9, 'per':5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open('audio.mp3', 'wb') as f: f.write(result)
能夠看到生成audio.mp3文件前端
語音識別僅支持如下格式 :pcm(不壓縮)、wav(不壓縮,pcm編碼)、amr(有損壓縮格式);8k/16k 採樣率 16bit 位深的單聲道。即:java
先準備轉碼工具ffmpeg並配置到環境變量中node
https://pan.baidu.com/s/1jonSAa_TG2XuaJEy3iTmHgpython
密碼:w6hkweb
ffmpeg -y -i 16k.wav -acodec pcm_s16le -f s16le -ac 1 -ar 16000 16k.pcm
from aip import AipSpeech import os """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() # 識別本地文件 res = client.asr(get_file_content('audio.mp3'), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0])
先本身錄好音ajax
from aip import AipSpeech import os import time """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename ################################################################## text = audio2text('1547553000.3612237.mp3')#語音識別 filename = text2audio(text) #語音合成 os.system(filename) #執行語音文件
我問機器答數據庫
from aip import AipSpeech import os import time """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename text = audio2text('audio.mp3')#語音識別 print(text) if text == '你叫什麼名字': text = '我叫小豬配齊' filename = text2audio(text) #語音合成 os.system(filename) #執行語音文件
這樣是有問題的,咱們換一種方式問就不行,因此還有另一種方式,天然語言處理的文本相識度技術json
import os import time from aip import AipSpeech from aip import AipNlp (nlp:nature language process 天然語言處理) """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename text = audio2text('audio.mp3')#語音識別
if nlp.simnet('你叫什麼名字',text).get('score') > 5: text = '我叫小豬配齊' else: text = '我不知道你在說什麼' filename = text2audio(text) #語音合成 os.system(filename) #執行語音文件
能夠查看使用文檔
import requests args = { "reqType":0, "perception": { "inputText": { "text": "北京今天的天氣怎麼樣" }, }, "userInfo": { "apiKey": "8940b5fa71984863b349d66894988538", "userId": "111" } } url = 'http://openapi.tuling123.com/openapi/api/v2' res = requests.post(url,json=args) print(res.json().get('results')[0].get('values').get('text'))
import requests import os import time from aip import AipSpeech from aip import AipNlp """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename ################################# def to_tuling(text): args = { "reqType": 0, "perception": { "inputText": { "text": text }, }, "userInfo": { "apiKey": "8940b5fa71984863b349d66894988538", "userId": "111" } } url = 'http://openapi.tuling123.com/openapi/api/v2' res = requests.post(url, json=args) text = res.json().get('results')[0].get('values').get('text') return text text = audio2text('audio.mp3')#語音識別 #問的內容 res = nlp.simnet('你叫什麼名字','你是誰') if nlp.simnet('你叫什麼名字',text).get('score') > 5: text = '我叫小豬配齊' else: text = to_tuling(text) # 使用第三方庫回答的內容 print(text) filename = text2audio(text) #語音合成 #回答 os.system(filename) #執行語音文件
from flask import Flask,render_template,request,jsonify,send_file from uuid import uuid4 import requests import os import time from aip import AipSpeech from aip import AipNlp """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename ################################# def to_tuling(text): args = { "reqType": 0, "perception": { "inputText": { "text": text }, }, "userInfo": { "apiKey": "8940b5fa71984863b349d66894988538", "userId": "111" } } url = 'http://openapi.tuling123.com/openapi/api/v2' res = requests.post(url, json=args) text = res.json().get('results')[0].get('values').get('text') return text app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/ai',methods=['GET','POST']) def ai(): # 1.保存錄音文件 audio = request.files.get('record') filename = f'{uuid4()}.wav' audio.save(filename) # 2.將錄音轉換爲PCM發送百度AI進行語音識別 q_text = audio2text(filename) # 3.將識別的問題交給圖靈機器人處理並獲取答案 a_text = to_tuling(q_text) # 4.將答案發送給百度AI進行語音合成 a_file = text2audio(a_text) # 5.將音頻文件發送給前端播放 print(a_file) return jsonify({'filename':a_file}) #前端讀取音頻文件 @app.route('/get_audio/<filename>') def get_audio(filename): return send_file(filename) if __name__ == '__main__': app.run('0.0.0.0',9527,debug=True)
template/index.html
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <audio src="" controls autoplay id="player"></audio> <p> <button onclick="start_reco()" style="background-color: yellow">錄製語音指令</button> </p> <p> <button onclick="stop_reco_audio()" style="background-color: blue">發送語音指令</button> </p> </body> <script type="text/javascript" src="/static/jQuery3.1.1.js"></script> <script type="text/javascript" src="/static/Recorder.js"></script> <script type="text/javascript"> var reco = null; //瀏覽器音頻播放內容 var audio_context = new AudioContext(); //用戶的媒體信息 navigator.getUserMedia = (navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia); navigator.getUserMedia({audio: true}, create_stream, function (err) { console.log(err) }); function create_stream(user_media) { var stream_input = audio_context.createMediaStreamSource(user_media);//建立流媒體 reco = new Recorder(stream_input);//裝流,盛放錄音內容 } function start_reco() {//開始錄音 reco.record(); } function stop_reco_audio() { reco.stop(); send_audio(); reco.clear(); } function send_audio() { reco.exportWAV(function (wav_file) { //發文件 var formdata = new FormData(); formdata.append('record',wav_file); console.log(formdata); $.ajax({ url: "http://192.168.13.142:9527/ai", type: 'post', processData: false, contentType: false, data: formdata, dataType: 'json', success: function (data) { document.getElementById('player').src = 'http://192.168.13.142:9527/get_audio/' +data.filename; } }); }) } function get_audio() { reco.exportWAV(function (wav_file) { // wav_file = Blob對象 // ws.send(wav_file); var formdata = new FormData(); formdata.append("record", wav_file); formdata.append("sender", toy_id); formdata.append("to_user", document.getElementById("from_user").innerText); console.log(formdata); $.ajax({ url: serv + "/toy_uploader", type: 'post', processData: false, contentType: false, data: formdata, dataType: 'json', success: function (data) { if (data.code == 0) { var send_str = JSON.stringify({"to_user": document.getElementById("from_user").innerText}); ws.send(send_str) } } }); }) } </script> </html>
static/
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Recorder = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ "use strict"; module.exports = require("./recorder").Recorder; },{"./recorder":2}],2:[function(require,module,exports){ 'use strict'; var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i];descriptor.enumerable = descriptor.enumerable || false;descriptor.configurable = true;if ("value" in descriptor) descriptor.writable = true;Object.defineProperty(target, descriptor.key, descriptor); } }return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps);if (staticProps) defineProperties(Constructor, staticProps);return Constructor; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.Recorder = undefined; var _inlineWorker = require('inline-worker'); var _inlineWorker2 = _interopRequireDefault(_inlineWorker); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var Recorder = exports.Recorder = (function () { function Recorder(source, cfg) { var _this = this; _classCallCheck(this, Recorder); this.config = { bufferLen: 4096, numChannels: 2, mimeType: 'audio_pcm/wav' }; this.recording = false; this.callbacks = { getBuffer: [], exportWAV: [] }; Object.assign(this.config, cfg); this.context = source.context; this.node = (this.context.createScriptProcessor || this.context.createJavaScriptNode).call(this.context, this.config.bufferLen, this.config.numChannels, this.config.numChannels); this.node.onaudioprocess = function (e) { if (!_this.recording) return; var buffer = []; for (var channel = 0; channel < _this.config.numChannels; channel++) { buffer.push(e.inputBuffer.getChannelData(channel)); } _this.worker.postMessage({ command: 'record', buffer: buffer }); }; source.connect(this.node); this.node.connect(this.context.destination); //this should not be necessary var self = {}; this.worker = new _inlineWorker2.default(function () { var recLength = 0, recBuffers = [], sampleRate = undefined, numChannels = undefined; self.onmessage = function (e) { switch (e.data.command) { case 'init': init(e.data.config); break; case 'record': record(e.data.buffer); break; case 'exportWAV': exportWAV(e.data.type); break; case 'getBuffer': getBuffer(); break; case 'clear': clear(); break; } }; function init(config) { sampleRate = config.sampleRate; numChannels = config.numChannels; initBuffers(); } function record(inputBuffer) { for (var channel = 0; channel < numChannels; channel++) { recBuffers[channel].push(inputBuffer[channel]); } recLength += inputBuffer[0].length; } function exportWAV(type) { var buffers = []; for (var channel = 0; channel < numChannels; channel++) { buffers.push(mergeBuffers(recBuffers[channel], recLength)); } var interleaved = undefined; if (numChannels === 2) { interleaved = interleave(buffers[0], buffers[1]); } else { interleaved = buffers[0]; } var dataview = encodeWAV(interleaved); var audioBlob = new Blob([dataview], { type: type }); self.postMessage({ command: 'exportWAV', data: audioBlob }); } function getBuffer() { var buffers = []; for (var channel = 0; channel < numChannels; channel++) { buffers.push(mergeBuffers(recBuffers[channel], recLength)); } self.postMessage({ command: 'getBuffer', data: buffers }); } function clear() { recLength = 0; recBuffers = []; initBuffers(); } function initBuffers() { for (var channel = 0; channel < numChannels; channel++) { recBuffers[channel] = []; } } function mergeBuffers(recBuffers, recLength) { var result = new Float32Array(recLength); var offset = 0; for (var i = 0; i < recBuffers.length; i++) { result.set(recBuffers[i], offset); offset += recBuffers[i].length; } return result; } function interleave(inputL, inputR) { var length = inputL.length + inputR.length; var result = new Float32Array(length); var index = 0, inputIndex = 0; while (index < length) { result[index++] = inputL[inputIndex]; result[index++] = inputR[inputIndex]; inputIndex++; } return result; } function floatTo16BitPCM(output, offset, input) { for (var i = 0; i < input.length; i++, offset += 2) { var s = Math.max(-1, Math.min(1, input[i])); output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); } } function writeString(view, offset, string) { for (var i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } } function encodeWAV(samples) { var buffer = new ArrayBuffer(44 + samples.length * 2); var view = new DataView(buffer); /* RIFF identifier */ writeString(view, 0, 'RIFF'); /* RIFF chunk length */ view.setUint32(4, 36 + samples.length * 2, true); /* RIFF type */ writeString(view, 8, 'WAVE'); /* format chunk identifier */ writeString(view, 12, 'fmt '); /* format chunk length */ view.setUint32(16, 16, true); /* sample format (raw) */ view.setUint16(20, 1, true); /* channel count */ view.setUint16(22, numChannels, true); /* sample rate */ view.setUint32(24, sampleRate, true); /* byte rate (sample rate * block align) */ view.setUint32(28, sampleRate * 4, true); /* block align (channel count * bytes per sample) */ view.setUint16(32, numChannels * 2, true); /* bits per sample */ view.setUint16(34, 16, true); /* data chunk identifier */ writeString(view, 36, 'data'); /* data chunk length */ view.setUint32(40, samples.length * 2, true); floatTo16BitPCM(view, 44, samples); return view; } }, self); this.worker.postMessage({ command: 'init', config: { sampleRate: this.context.sampleRate, numChannels: this.config.numChannels } }); this.worker.onmessage = function (e) { var cb = _this.callbacks[e.data.command].pop(); if (typeof cb == 'function') { cb(e.data.data); } }; } _createClass(Recorder, [{ key: 'record', value: function record() { this.recording = true; } }, { key: 'stop', value: function stop() { this.recording = false; } }, { key: 'clear', value: function clear() { this.worker.postMessage({ command: 'clear' }); } }, { key: 'getBuffer', value: function getBuffer(cb) { cb = cb || this.config.callback; if (!cb) throw new Error('Callback not set'); this.callbacks.getBuffer.push(cb); this.worker.postMessage({ command: 'getBuffer' }); } }, { key: 'exportWAV', value: function exportWAV(cb, mimeType) { mimeType = mimeType || this.config.mimeType; cb = cb || this.config.callback; if (!cb) throw new Error('Callback not set'); this.callbacks.exportWAV.push(cb); this.worker.postMessage({ command: 'exportWAV', type: mimeType }); } }], [{ key: 'forceDownload', value: function forceDownload(blob, filename) { var url = (window.URL || window.webkitURL).createObjectURL(blob); var link = window.document.createElement('a'); link.href = url; link.download = filename || 'output.wav'; var click = document.createEvent("Event"); click.initEvent("click", true, true); link.dispatchEvent(click); } }]); return Recorder; })(); exports.default = Recorder; },{"inline-worker":3}],3:[function(require,module,exports){ "use strict"; module.exports = require("./inline-worker"); },{"./inline-worker":4}],4:[function(require,module,exports){ (function (global){ "use strict"; var _createClass = (function () { function defineProperties(target, props) { for (var key in props) { var prop = props[key]; prop.configurable = true; if (prop.value) prop.writable = true; } Object.defineProperties(target, props); } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })(); var _classCallCheck = function (instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }; var WORKER_ENABLED = !!(global === global.window && global.URL && global.Blob && global.Worker); var InlineWorker = (function () { function InlineWorker(func, self) { var _this = this; _classCallCheck(this, InlineWorker); if (WORKER_ENABLED) { var functionBody = func.toString().trim().match(/^function\s*\w*\s*\([\w\s,]*\)\s*{([\w\W]*?)}$/)[1]; var url = global.URL.createObjectURL(new global.Blob([functionBody], { type: "text/javascript" })); return new global.Worker(url); } this.self = self; this.self.postMessage = function (data) { setTimeout(function () { _this.onmessage({ data: data }); }, 0); }; setTimeout(function () { func.call(self); }, 0); } _createClass(InlineWorker, { postMessage: { value: function postMessage(data) { var _this = this; setTimeout(function () { _this.self.onmessage({ data: data }); }, 0); } } }); return InlineWorker; })(); module.exports = InlineWorker; }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{}]},{},[1])(1) });
以及文件jQuery3.1.1.js
請下載
https://blog-static.cnblogs.com/files/lilz/jQuery3.1.1.js
測試:
請先學習:
from flask import Flask,render_template,request,jsonify,send_file from uuid import uuid4 import requests import os import time from aip import AipSpeech from aip import AipNlp import pymongo import json from bson import ObjectId """ 你的 APPID AK SK """ APP_ID = '15421077' API_KEY = 'D8qPp3uMDcjvEfkFzSKWUlji' SECRET_KEY = 'gvHXZFpOaZrkhDSxPiiyQvN3ZUjb4ECU' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ##################鏈接本地數據庫############## mongoclient = pymongo.MongoClient(host='127.0.0.1',port=27017) MongoDB = mongoclient['locals'] #鏈接具體數據庫 ############################################### #語音識別 # 讀取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filePath): # 識別本地文件 res = client.asr(get_file_content(filePath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############################################### #語音合成 def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, 'spd': 3, 'pit': 9, 'per': 5 }) # 識別正確返回語音二進制 錯誤則返回dict 參照下面錯誤碼 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename ################################# def to_tuling(text): args = { "reqType": 0, "perception": { "inputText": { "text": text }, }, "userInfo": { "apiKey": "8940b5fa71984863b349d66894988538", "userId": "111" } } url = 'http://openapi.tuling123.com/openapi/api/v2' res = requests.post(url, json=args) text = res.json().get('results')[0].get('values').get('text') return text app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/ai',methods=['GET','POST']) def ai(): # 1.保存錄音文件 audio = request.files.get('record') filename = f'{uuid4()}.wav' audio.save(filename) # 2.將錄音轉換爲PCM發送百度AI進行語音識別 q_text = audio2text(filename) # 3.將識別的問題交給圖靈機器人處理並獲取答案 a_text = to_tuling(q_text) # 4.將答案發送給百度AI進行語音合成 a_file = text2audio(a_text) res = MongoDB.users.insert_one({'chat_list': [{'u': q_text},{'a':a_text}]}) # 5.將音頻文件發送給前端播放 print(a_file) return jsonify({'filename':a_file}) #前端讀取音頻文件 @app.route('/get_audio/<filename>') def get_audio(filename): return send_file(filename) if __name__ == '__main__': app.run('0.0.0.0',9527,debug=True)
測試