Android AudioRecord錄音並websocket實時傳輸,AudioTrack 播放wav 音頻,Speex加密

時間 2019-11-17

標籤 android audiorecord 錄音 websocket 實時傳輸 audiotrack 播放 wav 音頻 speex 加密欄目 Android 简体版

原文原文鏈接

簡介

在一家專一於AI音頻公司作了一年，最近正處於預離職狀態，正好剛剛給客戶寫了個關於android音頻方面的demo，花了我足足一天趕出來的，感受挺全面的決定再努力一點寫個總結。公司雖小，是和中科院聲學所合做，也和訊飛同樣也有本身關於音頻的一系列語音識別/語音轉寫等引擎，麻雀雖小五臟俱全的感受。 Android 音頻這塊其實也沒那麼神祕，神祕的地方有專門的C++/算法工程師等爲咱們負責，你們都懂得，我只是搬搬磚。android

主要涉及3點

SpeechToText(音頻轉文本：STT)： AudioRecord 錄製音頻並用本地和Socket2中方式上傳。

TextToSpeech （文本轉語音：TTS） API獲取音頻流並用AudioTrack 播放。

Speex 加密

這裏不講TTS/STT底層原理，怎麼實現的呆了這麼久我也只是一點點，一點點而已，涉及人耳聽聲相關函數/聲波/傅里葉分析/一系列複雜函數，這裏不敢班門弄斧了感興趣請你們自行Google 。，git

AudioRecord 介紹

AudioRecord 過程是一個IPC過程，Java層經過JNI調用到native層的AudioRecord，後者經過IAudioRecord接口跨進程調用到 AudioFlinger，AudioFlinger負責啓動錄音線程，將從錄音數據源裏採集的音頻數據填充到共享內存緩衝區，而後應用程序側從其裏面拷貝數據到本身的緩衝區。github

public AudioRecord(int audioSource, //指定聲音源 MediaRecorder.AudioSource.MIC;
       int sampleRateInHz,//指定採樣率 這裏8000 
       int channelConfig,//指定聲道數，單聲道
       int audioFormat, //指定8/16pcm   這裏16bit 模擬信號轉化爲數字信號時的量化單位
       int bufferSizeInBytes)//緩衝區大小  根據採樣率 通道 量化參數決定
複製代碼

1. STT 之本地錄完以後文件形式上傳

第二步再與socket 上傳比較 //參數初始化 // 音頻輸入-麥克風web

public final static int AUDIO_INPUT = MediaRecorder.AudioSource.MIC;
public final static int AUDIO_SAMPLE_RATE = 8000; // 44.1KHz,廣泛使用的頻率
public final static int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
public final static int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
private int bufferSizeInBytes = 0;//緩衝區字節大小
private AudioRecord audioRecord;
private volatile boolean isRecord = false;// volatile 可見性  設置正在錄製的狀態
複製代碼

//建立AudioRecord算法

private void creatAudioRecord() {
    // 得到緩衝區字節大小
    bufferSizeInBytes = AudioRecord.getMinBufferSize(AudioFileUtils.AUDIO_SAMPLE_RATE,
            AudioFileUtils.CHANNEL_CONFIG, AudioFileUtils.AUDIO_FORMAT);
    // MONO單聲道
    audioRecord = new AudioRecord(AudioFileUtils.AUDIO_INPUT, AudioFileUtils.AUDIO_SAMPLE_RATE,
            AudioFileUtils.CHANNEL_CONFIG, AudioFileUtils.AUDIO_FORMAT, bufferSizeInBytes);
}
//
@Override
public boolean onTouch(View v, MotionEvent event) {

    AudioRecordUtils utils = AudioRecordUtils.getInstance();
    switch (event.getAction()) {
        case MotionEvent.ACTION_DOWN:
            utils.startRecordAndFile();
            break;
        case MotionEvent.ACTION_UP:
            utils.stopRecordAndFile();
            Log.d(TAG, "stopRecordAndFile");
            stt();
            break;
    }
    return false;
}

//開始錄音 
    public int startRecordAndFile() {
    Log.d("NLPService", "startRecordAndFile");

    // 判斷是否有外部存儲設備sdcard
    if (AudioFileUtils.isSdcardExit()) {
        if (isRecord) {
            return ErrorCode.E_STATE_RECODING;
        } else {
            if (audioRecord == null) {
                creatAudioRecord();
            }
            audioRecord.startRecording();
            // 讓錄製狀態爲true
            isRecord = true;
            // 開啓音頻文件寫入線程
            new Thread(new AudioRecordThread()).start();
            return ErrorCode.SUCCESS;
        }

    } else {
        return ErrorCode.E_NOSDCARD;
    }

}
//錄音線程 
    class AudioRecordThread implements Runnable {
    @Override
    public void run() {

        writeDateTOFile();// 往文件中寫入裸數據
        AudioFileUtils.raw2Wav(mAudioRaw, mAudioWav, bufferSizeInBytes);// 給裸數據加上頭文件

    }
}
// 往文件中寫入裸數據
private void writeDateTOFile() {
    Log.d("NLPService", "writeDateTOFile");
    // new一個byte數組用來存一些字節數據，大小爲緩衝區大小
    byte[] audiodata = new byte[bufferSizeInBytes];
    FileOutputStream fos = null;
    int readsize = 0;
    try {
        File file = new File(mAudioRaw);
        if (file.exists()) {
            file.delete();
        }
        fos = new FileOutputStream(file);// 創建一個可存取字節的文件
    } catch (Exception e) {
        e.printStackTrace();
    }
    while (isRecord) {
        readsize = audioRecord.read(audiodata, 0, bufferSizeInBytes);
        if (AudioRecord.ERROR_INVALID_OPERATION != readsize && fos != null) {
            try {
                fos.write(audiodata);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    try {
        if (fos != null)
            fos.close();// 關閉寫入流
    } catch (IOException e) {
        e.printStackTrace();
    }
}

//add wav header

    public static void raw2Wav(String inFilename, String outFilename, int bufferSizeInBytes) {
    Log.d("NLPService", "raw2Wav");
    FileInputStream in = null;
    RandomAccessFile out = null;
    byte[] data = new byte[bufferSizeInBytes];
    try {
        in = new FileInputStream(inFilename);
        out = new RandomAccessFile(outFilename, "rw");
        fixWavHeader(out, AUDIO_SAMPLE_RATE, 1, AudioFormat.ENCODING_PCM_16BIT);

        while (in.read(data) != -1) {
            out.write(data);
        }
        in.close();
        out.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

private static void fixWavHeader(RandomAccessFile file, int rate, int channels, int format) {
    try {
        int blockAlign;
        if (format == AudioFormat.ENCODING_PCM_16BIT)
            blockAlign = channels * 2;
        else
            blockAlign = channels;

        int bitsPerSample;
        if (format == AudioFormat.ENCODING_PCM_16BIT)
            bitsPerSample = 16;
        else
            bitsPerSample = 8;

        long dataLen = file.length() - 44;

        // hard coding
        byte[] header = new byte[44];
        header[0] = 'R'; // RIFF/WAVE header
        header[1] = 'I';
        header[2] = 'F';
        header[3] = 'F';
        header[4] = (byte) ((dataLen + 36) & 0xff);
        header[5] = (byte) (((dataLen + 36) >> 8) & 0xff);
        header[6] = (byte) (((dataLen + 36) >> 16) & 0xff);
        header[7] = (byte) (((dataLen + 36) >> 24) & 0xff);
        header[8] = 'W';
        header[9] = 'A';
        header[10] = 'V';
        header[11] = 'E';
        header[12] = 'f'; // 'fmt ' chunk
        header[13] = 'm';
        header[14] = 't';
        header[15] = ' ';
        header[16] = 16; // 4 bytes: size of 'fmt ' chunk
        header[17] = 0;
        header[18] = 0;
        header[19] = 0;
        header[20] = 1; // format = 1
        header[21] = 0;
        header[22] = (byte) channels;
        header[23] = 0;
        header[24] = (byte) (rate & 0xff);
        header[25] = (byte) ((rate >> 8) & 0xff);
        header[26] = (byte) ((rate >> 16) & 0xff);
        header[27] = (byte) ((rate >> 24) & 0xff);
        header[28] = (byte) ((rate * blockAlign) & 0xff);
        header[29] = (byte) (((rate * blockAlign) >> 8) & 0xff);
        header[30] = (byte) (((rate * blockAlign) >> 16) & 0xff);
        header[31] = (byte) (((rate * blockAlign) >> 24) & 0xff);
        header[32] = (byte) (blockAlign); // block align
        header[33] = 0;
        header[34] = (byte) bitsPerSample; // bits per sample
        header[35] = 0;
        header[36] = 'd';
        header[37] = 'a';
        header[38] = 't';
        header[39] = 'a';
        header[40] = (byte) (dataLen & 0xff);
        header[41] = (byte) ((dataLen >> 8) & 0xff);
        header[42] = (byte) ((dataLen >> 16) & 0xff);
        header[43] = (byte) ((dataLen >> 24) & 0xff);

        file.seek(0);
        file.write(header, 0, 44);
    } catch (Exception e) {

    } finally {

    }
}
//文件上傳  結果回調

  public void stt() {

    File voiceFile = new File(AudioFileUtils.getWavFilePath());
    if (!voiceFile.exists()) {
        return;
    }
    RequestBody requestBody = RequestBody.create(MediaType.parse("multipart/form-data"), voiceFile);
    MultipartBody.Part file =
            MultipartBody.Part.createFormData("file", voiceFile.getName(), requestBody);


    NetRequest.sAPIClient.stt(RequestBodyUtil.getParams(), file)
            .observeOn(AndroidSchedulers.mainThread())
            .subscribe(new Action1<STT>() {
                @Override
                public void call(STT result) {
                    if (result != null && result.getCount() > 0) {
                        sttTv.setText("結果: " + result.getSegments().get(0).getContent());
                    }

                }
            });
}
//記得關閉AudioRecord 


    private void stopRecordAndFile() {
    if (audioRecord != null) {
        isRecord = false;// 中止文件寫入
        audioRecord.stop();
        audioRecord.release();// 釋放資源
        audioRecord = null;
    }

}
複製代碼

2. STT 之AudioRecord錄製websocket 在線傳輸

WebSocket介紹：我只記住一點點：它是應用層協議，就像http 也是，不過它是一種全雙工通訊， socket 只是TCP/IP 的封裝，不算協議。websocket 第一次須要以http 接口創建長鏈接,就這麼點了。後端

//MyWebSocketListener Websocket 回調api

class MyWebSocketListener extends WebSocketListener {
    @Override
    public void onOpen(WebSocket webSocket, Response response) {
        output("onOpen: " + "webSocket connect success");
        STTWebSocketActivity.this.webSocket = webSocket;
        startRecordAndFile();  
        //看清楚了開始錄音函數在這裏，緣由因爲涉及回調，當分離時候 處理邏輯複雜
        //，並且第二次錄音時候因爲服務端WebSocket已經關閉 ，錄音數據不能正常傳輸，須要從新創建鏈接
    }

    @Override
    public void onMessage(WebSocket webSocket, final String text) {
        runOnUiThread(new Runnable() {
            @Override
            public void run() {
                sttTv.setText("Stt result:" + text);
            }
        });

        output("onMessage1: " + text);
    }

    @Override
    public void onMessage(WebSocket webSocket, ByteString bytes) {
        output("onMessage2 byteString: " + bytes);
    }

    @Override
    public void onClosing(WebSocket webSocket, int code, String reason) {
        output("onClosing: " + code + "/" + reason);
    }

    @Override
    public void onClosed(WebSocket webSocket, int code, String reason) {
        output("onClosed: " + code + "/" + reason);
    }

    @Override
    public void onFailure(WebSocket webSocket, Throwable t, Response response) {
        output("onFailure: " + t.getMessage());
    }

    private void output(String s) {
        Log.d("NLPService", s);
    }

}

補充：AudioRecord建立與前面相同
// okhttp 建立websocket 並設置監聽
  private void createWebSocket() {
    Request request = new Request.Builder().url(sttApi).build();
    NetRequest.getOkHttpClient().newWebSocket(request, socketListener);
}

class AudioRecordThread implements Runnable {

    @Override
    public void run() {
    //byteBuffer 緩衝區 （內存地址以數組形式排列，一個基本數據類型的數組）
        ByteBuffer audioBuffer = ByteBuffer.allocateDirect(bufferSizeInBytes).order(ByteOrder.LITTLE_ENDIAN);//小端模式
        int readSize = 0;
        Log.d(TAG, "isRecord=" + isRecord);
        while (isRecord) {
            readSize = audioRecord.read(audioBuffer, audioBuffer.capacity());
            if (readSize == AudioRecord.ERROR_INVALID_OPERATION || readSize == AudioRecord.ERROR_BAD_VALUE) {
                Log.d("NLPService", "Could not read audio data.");
                break;
            }
            boolean send = webSocket.send(ByteString.of(audioBuffer));//就這麼簡單哈哈
            Log.d("NLPService", "send=" + send);
            audioBuffer.clear();//記住清空
        }
        webSocket.send("close");//錄製完以後發送約定字段。通知服務端關閉。  
    }
}
複製代碼

......而後呢，而後就有數據了，就是這麼簡單

......而後老司機就要說了。。。你這沒有加密啊，效率很低啊。在此陳述一點，這裏是轉寫引擎，每次就一句話，傳輸數據量自己不大，後端大神們說不必加密，而後我就照辦了...固然也能夠一邊加密一邊傳輸數組

3.TTS 之AudioTrack 播放wav文件

這裏就比較簡單了，okhttp 調用API 傳遞text 獲取response 而後用之AudioTrack 播放。這裏是原始音頻流，mediaplayer播放就有點大才小用了（我沒試過），不過 mediaplayer播放也是IPC過程，底層最終也是調用AudioTrack 進行播放的。直接上代碼：websocket

public boolean request() {
    OkHttpClient client = NetRequest.getOkHttpClient();
    Request request = new Request.Builder().url(NetRequest.BASE_URL + "api/tts?text=今天是星期三").build();

    client.newCall(request).enqueue(new Callback() {
        @Override
        public void onFailure(Call call, IOException e) {
            
        }

        @Override
        public void onResponse(Call call, Response response) throws IOException {
            play(response.body().bytes());
        }
    });

    return true;
}

    public void play( byte[] data) {

    try {
        Log.d(TAG, "audioTrack start ");
        AudioTrack audioTrack = new AudioTrack(mOutput, mSamplingRate,
                AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT,
                data.length, AudioTrack.MODE_STATIC);
        audioTrack.write(data, 0, data.length);
        audioTrack.play();
        while (audioTrack.getPlaybackHeadPosition() < (data.length / 2)) {
            Thread.yield();//播放延遲處理......
        }
        audioTrack.stop();
        audioTrack.release();
    } catch (IllegalArgumentException e) {
        
    } catch (IllegalStateException e) {
    }
}
複製代碼

4.speex 加密

speex 是一個開源免費的音頻加密庫，C++ 寫的。demo裏面是編譯好的so 文件，，我親自編譯了很久各類坑，最後沒成功，只能借用了。-_-||。下面有個speexDemo整個項目在工程裏，音頻加密解密都正常，親測可用。學習這塊時候CSDN下來的，搬過來湊合數。dom

public static void raw2spx(String inFileName, String outFileName) {

	FileInputStream rawFileInputStream = null;
	FileOutputStream fileOutputStream = null;
	try {
		rawFileInputStream = new FileInputStream(inFileName);
		fileOutputStream = new FileOutputStream(outFileName);
		byte[] rawbyte = new byte[320];
		byte[] encoded = new byte[160];
		//將原數據轉換成spx壓縮的文件，speex只能編碼160字節的數據，須要使用一個循環
		int readedtotal = 0;
		int size = 0;
		int encodedtotal = 0;
		while ((size = rawFileInputStream.read(rawbyte, 0, 320)) != -1) {
			readedtotal = readedtotal + size;
			short[] rawdata = ShortByteUtil.byteArray2ShortArray(rawbyte);
			int encodesize = SpeexUtil.getInstance().encode(rawdata, 0, encoded, rawdata.length);
			fileOutputStream.write(encoded, 0, encodesize);
			encodedtotal = encodedtotal + encodesize;
		}
		fileOutputStream.close();
		rawFileInputStream.close();
	} catch (Exception e) {

	}

}
複製代碼