實時語音轉文字app簡易demo(這裏使用了阿里的接口,能夠替換成科大訊飛)

這裏只是功能實現(我的非android開發),可能有不少更好的實現方式,該功能的開發是以前看到過阿里的實時語音轉文字的接口,當時就想把這個功能作到手機上,本身又是java開發,就百度了點基礎的android知識作了個簡單的實現。java

主方法(手機端),主要任務採集聲音,流形式發送到後臺android

package com.hht.myapplication;

import android.Manifest;
import android.text.ClipboardManager;
import android.content.Context;
import android.content.pm.PackageManager;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Bundle;
import android.os.Handler;
import android.support.v4.app.ActivityCompat;
import android.support.v4.content.ContextCompat;
import android.support.v7.app.AppCompatActivity;
import android.text.method.ScrollingMovementMethod;
import android.view.View;
import android.widget.TextView;
import android.widget.Toast;

import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.Socket;

/**
 * 語音小助手
 * hht
 */
public class MainActivity extends AppCompatActivity {
    //接收轉換的文字流
    DataInputStream dis;
    //音頻流上傳通道
    OutputStream ous;

    String serverIp = "101.201.XXX.XXX";
    int serverPort = 5555;

    private TextView realText;
    private TextView finalText;

    //音頻相關
    AudioRecord audioRecord=null;
    int bufferSize=0;//最小緩衝區大小
    int sampleRateInHz = 16000;//採樣率
    int channelConfig = AudioFormat.CHANNEL_IN_DEFAULT; //單聲道
    int audioFormat = AudioFormat.ENCODING_PCM_16BIT; //量化位數
    private boolean isRecording = true;
    private Handler handler=null;


    private  String content ;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        realText = (TextView) findViewById(R.id.textView2);
        finalText = (TextView) findViewById(R.id.textView);
        finalText.setMovementMethod(ScrollingMovementMethod.getInstance()) ;
        handler = new Handler();

        if (ContextCompat.checkSelfPermission(this, Manifest.permission.CALL_PHONE)  != PackageManager.PERMISSION_GRANTED)  {
            ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO},1);
        } else {
            init();
        }
    }

    public void onClickCopy(View v) {
        // 從API11開始android推薦使用android.content.ClipboardManager
        // 爲了兼容低版本咱們這裏使用舊版的android.text.ClipboardManager,雖然提示deprecated,但不影響使用。
        ClipboardManager cm = (ClipboardManager) getSystemService(Context.CLIPBOARD_SERVICE);
        // 將文本內容放到系統剪貼板裏。
        cm.setText(finalText.getText());
        Toast.makeText(this, "複製成功,能夠發給朋友們了。", Toast.LENGTH_LONG).show();
    }


    public void init(){
        System.out.println("初始化錄音");
        bufferSize = AudioRecord.getMinBufferSize(sampleRateInHz,channelConfig, audioFormat)+1000;//計算最小緩衝區
        try{
            audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,sampleRateInHz,channelConfig, audioFormat, bufferSize);//建立AudioRecorder對象

            Runnable startRunnable = new Runnable(){
                @Override
                public void run() {
                    connect();
                    new sendDataThread().start();
                    new getDataThread().start();
                }
            };
            new Thread(startRunnable).start();
        }catch (Exception e){
            e.printStackTrace();
        }

        System.out.println("初始化錄音成功");
    }


    public void connect(){
        try{
            Socket socket = new Socket(serverIp,serverPort);
            InputStream is=socket.getInputStream();
            ous = socket.getOutputStream();
            dis=new DataInputStream(is);
        }catch(IOException e){
            e.printStackTrace();
        }
    }




    //發送數據
    class sendDataThread extends Thread {
        public void run() {
            byte[] buffer = new byte[bufferSize];
            audioRecord.startRecording();//開始錄音
            int r = 0;

            try {
                while (isRecording&&audioRecord.read(buffer,0,bufferSize)>0) {
                    ous.write(buffer);
                    ous.flush();
                }
                audioRecord.stop();//中止錄音
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    //接收數據
    class getDataThread extends Thread {
        public void run() {
            while (true) {
                String msg;
                try {
                    msg = dis.readUTF();
                    if (!"status".equals(msg)&&!"".equals(msg)) {//status 爲心跳檢測
                        content=msg;
                        handler.post(runnableUi);

                    }
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }

    // 構建Runnable對象,在runnable中更新界面
    Runnable   runnableUi=new  Runnable(){
        @Override
        public void run() {
            //更新界面
            realText.setText(content);
            finalText.append(content);
            //跳轉到底部
            int offset=finalText.getLineCount()*finalText.getLineHeight();
            if(finalText.getLineCount()>15 && offset>finalText.getHeight()){
                finalText.scrollTo(0,offset-finalText.getHeight());
            }
        }
    };

    @Override
    public void onRequestPermissionsResult(int requestCode, String permissions[], int[] grantResults) {
        switch (requestCode) {
            case 1: {
                if (grantResults.length > 0  && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                    // 權限被用戶贊成,能夠去放肆了。
                    init();
                } else {
                    // 權限被用戶拒絕了,洗洗睡吧。
                }
                return;
            }
        }
    }
}

服務器端,接收流信息上傳阿里獲取實時轉換結果,這裏應該有本身的控制策略,我這裏只作了實現,策略沒有哦web

這部分代碼是接收和處理客戶端請求json

package com.hmkx.freezingapi.rest.jkjweb;

import java.io.Closeable;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hmkx.freezingapi.util.ali.RealtimeAsrDemo;

/**
 * 語音轉換服務 監聽
 * 
 * @author hht
 * @since 2018-06-12
 */
public class AudioChangeServerThread extends Thread {

	private static Logger log = LoggerFactory.getLogger(AudioChangeServerThread.class);
	static int cCount = 0;//當前鏈接數
	static int maxCount = 10;//最大鏈接數
	static ServerSocket server = null;
	static List<DataOutputStream>socketsOut = new ArrayList<DataOutputStream>();
	static List<Socket>sockets = new ArrayList<Socket>();
	public static boolean runStatus = false;//標記狀態 關閉後再也不接受鏈接
	
	
	public void run(){
		runStatus = true;
		if(server==null){//語音轉換服務監聽
			try {
				server = new ServerSocket(5555);
			} catch (IOException e) {		
				e.printStackTrace();
			}
			while (runStatus) {
				//每接受一個鏈接,清理半關閉的鏈接
				validateSocket();
				if(cCount<=maxCount){
					try {
						Socket temp = server.accept();
						sockets.add(temp);
						OutputStream os=temp.getOutputStream();
						DataOutputStream dos=new DataOutputStream(os);
						InputStream ins = temp.getInputStream();
						RealtimeAsrDemo lun = new RealtimeAsrDemo(ins,dos);
						log.error("audio change start ....");
						new Thread(lun).start();  
						cCount++;
					} catch (IOException e) {
						log.error("has connected io exception");
					}
					log.error("has connected "+cCount);
				}
			}
		}
	}
	
	/**
	 * 清楚已經斷開的鏈接
	 * @param i
	 */
	public static void removeSocket(int i){
		Aclose(sockets.get(i));
		Aclose(socketsOut.get(i));
		sockets.remove(i);
		socketsOut.remove(i);
		cCount--;
	}
	/**
	 * 清楚已經斷開的鏈接
	 * @param i
	 */
	public void validateSocket(){
		for(int i=0;i<socketsOut.size();i++){
			try {
				DataOutputStream temp = socketsOut.get(i);
				temp.writeUTF("status");
				temp.flush();
			} catch (Exception e) {
				removeSocket(i);
			}
		}
	}
	
	
	/**
	 * 鏈接關閉方法
	 * @param o
	 */
	public static  void Aclose(Object...o){
		for(int i=0;i<o.length;i++){
			if(o[i] instanceof Closeable){
				try {
					Closeable c = (Closeable)o[i];
					c.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if(o[i] instanceof ServerSocket){
				try {
					ServerSocket c = (ServerSocket)o[i];
					c.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if(o[i] instanceof Socket){
				try {
					Socket c = (Socket)o[i];
					c.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
}

下面這部分代碼是處理流和阿里雲服務接口的交互,用到阿里的包這個自行引入,(最近科大訊飛也提供了相似的接口,貌似效果更好)能夠替換這部分邏輯就能夠api

package com.hmkx.freezingapi.util.ali;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSON;
import com.alibaba.idst.nls.realtime.NlsClient;
import com.alibaba.idst.nls.realtime.NlsFuture;
import com.alibaba.idst.nls.realtime.event.NlsEvent;
import com.alibaba.idst.nls.realtime.event.NlsListener;
import com.alibaba.idst.nls.realtime.protocol.NlsRequest;
import com.alibaba.idst.nls.realtime.protocol.NlsResponse;
import com.hmkx.freezingapi.rest.jkjweb.AudioChangeServerThread;

/**
 * 語音實時轉文字工具類
 * 
 * @author hht
 *
 */
public class RealtimeAsrDemo implements NlsListener,Runnable{
	protected NlsClient client = new NlsClient();
	protected static final String asrSC = "pcm";

	static Logger logger = LoggerFactory.getLogger(RealtimeAsrDemo.class);
	public String appKey = "XXX";
	protected String ak_id = "XXX";
	protected String ak_secret = "XXX";
	private InputStream fis = null;
	private DataOutputStream dos = null;
	

	
	public RealtimeAsrDemo(InputStream fis, DataOutputStream dos) {
		this.fis = fis;
		this.dos = dos;
	}

	public void shutDown() {
		logger.error("close NLS client manually!");
		client.close();
		logger.error("demo done");
	}

	public void init() {
		logger.error("init Nls client...");
		client.init();
	}

	public void process() {
		logger.error("open audio file...");
		if (fis != null) {
			logger.error("create NLS future");
			process(fis);
			logger.error("calling NLS service end");
		}
	}
	

	public void process(InputStream ins) {
		try {
			NlsRequest req = buildRequest();
			NlsFuture future = client.createNlsFuture(req, this);
			logger.error("call NLS service");
			byte[] b = new byte[8000];
			int len = 0;
			while (AudioChangeServerThread.runStatus && (len = ins.read(b)) > 0 ) {
				future.sendVoice(b, 0, len);
			}
			logger.error("send finish signal!");
			future.sendFinishSignal();

			logger.error("main thread enter waiting .");
			future.await(100000);

		} catch (Exception e) {
			StringWriter sw = new StringWriter();
			e.printStackTrace(new PrintWriter(sw));
			logger.error(sw.toString());
		}
	}
	
	
	


	protected NlsRequest buildRequest() {
		NlsRequest req = new NlsRequest();
		req.setAppkey(appKey);
		req.setFormat(asrSC);
		req.setResponseMode("streaming");
		req.setSampleRate(16000);
		// 用戶根據[熱詞文檔](~~49179~~) 設置自定義熱詞。
		// 經過設置VocabularyId調用熱詞。
		// req.setVocabularyId("");
		// 設置關鍵詞庫ID 使用時請修改成自定義的詞庫ID
		// req.setKeyWordListId("c1391f1c1f1b4002936893c6d97592f3");
		// the id and the id secret
		req.authorize(ak_id, ak_secret);
		return req;

	}

	@Override
	public void onMessageReceived(NlsEvent e) {
		NlsResponse response = e.getResponse();
		response.getFinish();
		if (response.result != null) {
			logger.error(response.getResult().toString());
			if(response.getResult().getStatus_code()==0&&!"".equals(response.getText())){
				String content = response.getText();
				logger.error(content);
				try {
					dos.writeUTF(content);
					dos.flush();
				} catch (IOException e1) {
					// TODO Auto-generated catch block
					e1.printStackTrace();
				}
			}
		} else {
			logger.error(JSON.toJSONString(response));
		}
	}

	@Override
	public void onOperationFailed(NlsEvent e) {
		logger.error("status code is {}, on operation failed: {}"+ e.getResponse().getStatusCode()+e.getErrorMessage());

	}

	@Override
	public void onChannelClosed(NlsEvent e) {
		logger.error("on websocket closed.");
	}

	@Override
	public void run() {
		init();
		process();
		shutDown();
	}


}

這樣就能夠了,先啓動服務端,而後啓動app就能夠實現語音是說轉文字了哦服務器

補充代碼websocket

<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    android:id="@+id/activity_main"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:paddingBottom="@dimen/activity_vertical_margin"
    android:paddingLeft="@dimen/activity_horizontal_margin"
    android:paddingRight="@dimen/activity_horizontal_margin"
    android:paddingTop="@dimen/activity_vertical_margin"
    tools:context="com.hht.myapplication.MainActivity">

    <TextView
        android:text=""
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:id="@+id/textView2"
        android:maxLines="5"
        android:layout_alignParentTop="true"
        android:layout_alignParentStart="true" />

    <TextView
        android:text=""
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:textIsSelectable="true"
        android:focusable="true"
        android:layout_centerVertical="true"
        android:layout_alignParentStart="true"
        android:scrollbars="vertical"
        android:maxLines="15"
        android:fadeScrollbars="false"
        android:id="@+id/textView" />

    <Button
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:onClick="onClickCopy"
        android:text="複製上面的文本內容"
        android:id="@+id/button"
        android:layout_marginBottom="12dp"
        android:layout_alignParentBottom="true"
        android:layout_alignParentEnd="true"
        android:layout_marginEnd="7dp" />

</RelativeLayout>
相關文章
相關標籤/搜索