1小時學會：最簡單的iOS直播推流（八）h264 aac 軟編碼

時間 2019-11-24

標籤小時學會最簡單 ios 直播 h264 aac 編碼欄目 iOS 简体版

原文原文鏈接

最簡單的iOS 推流代碼，視頻捕獲，軟編碼(faac，x264)，硬編碼（aac，h264），美顏，flv編碼，rtmp協議，陸續更新代碼解析，你想學的知識這裏都有，願意懂直播技術的同窗快來看！！git

源代碼：https://github.com/hardman/AWLivegithub

軟編碼包含3部份內容：bash

將pcm/yuv數據編碼成aac/h264格式
將aac/h264數據封裝成flv格式
另外不管軟編碼仍是硬編碼，最後得到的flv格式數據，須要經過rtmp協議發送至服務器。

本篇將介紹第1部份內容。另外兩部份內容將在後續文章中介紹。服務器

根據上文介紹，軟編碼實現，對應音頻／視頻編碼分別爲：AWSWFaacEncoder 和 AWSWX264Encoder。架構

這兩個類只是用OC封裝的一個殼，實際上使用的是 libfaac 和 libx264 進行處理。ide

音頻軟編碼

aw_faac.h和aw_faac.c這兩個文件是對libfaac這個庫使用方法的簡單封裝。這兩個文件預期功能是，封裝出一個函數，將pcm數據，轉成aac數據。函數

faac的使用步驟：post

使用 faacEncOpen 開啓編碼環境配置編碼屬性。
使用 faacEncEncode 函數編碼。
使用完畢後，調用 faacEncClose 關閉編碼環境。

根據這個步驟，來看aw_faac.c文件。ui

faac封裝第一步：開啓編碼環境

/*
	aw_faac_context 是本身建立的結構體，用於輔助aac編碼，存儲了faac庫的必需的數據，及一些過程變量。
	它的建立及關閉請看demo中的代碼，很簡單，這裏不須要解釋。
*/
static void aw_open_faac_enc_handler(aw_faac_context *faac_ctx){
    // 開啓faac
    // 參數依次爲：
    // 輸入 採樣率(44100) 聲道數(2)
    // 獲得 最大輸入樣本數(1024) 最大輸出字節數(2048)
    faac_ctx->faac_handler = faacEncOpen(faac_ctx->config.sample_rate, faac_ctx->config.channel_count, &faac_ctx->max_input_sample_count, &faac_ctx->max_output_byte_count);
    
    //根據最大輸入樣本數獲得最大輸入字節數
    faac_ctx->max_input_byte_count = faac_ctx->max_input_sample_count * faac_ctx->config.sample_size / 8;
    
    if(!faac_ctx->faac_handler){
        aw_log("[E] aac handler open failed");
        return;
    }
    
    //建立buffer
    faac_ctx->aac_buffer = aw_alloc(faac_ctx->max_output_byte_count);
    
    //獲取配置
    faacEncConfigurationPtr faac_config = faacEncGetCurrentConfiguration(faac_ctx->faac_handler);
    if (faac_ctx->config.sample_size == 16) {
        faac_config->inputFormat = FAAC_INPUT_16BIT;
    }else if (faac_ctx->config.sample_size == 24) {
        faac_config->inputFormat = FAAC_INPUT_24BIT;
    }else if (faac_ctx->config.sample_size == 32) {
        faac_config->inputFormat = FAAC_INPUT_32BIT;
    }else{
        faac_config->inputFormat = FAAC_INPUT_FLOAT;
    }
    
    //配置
    faac_config->aacObjectType = LOW;//aac對象類型: LOW Main LTP
    faac_config->mpegVersion = MPEG4;//mpeg版本: MPEG2 MPEG4
    faac_config->useTns = 1;//抗噪
    faac_config->allowMidside = 0;// 是否使用mid/side編碼
    if(faac_ctx->config.bitrate){
    	//每秒鐘每一個通道的bitrate
        faac_config->bitRate = faac_ctx->config.bitrate / faac_ctx->config.channel_count;
    }
    
    faacEncSetConfiguration(faac_ctx->faac_handler, faac_config);
    
    //獲取audio specific config，本系列文章中第六篇裏面介紹了這個數據，它存儲了aac格式的一些關鍵數據，
    //在rtmp協議中，必須將此數據在全部音頻幀以前發送
    uint8_t *audio_specific_data = NULL;
    unsigned long audio_specific_data_len = 0;
    faacEncGetDecoderSpecificInfo(faac_ctx->faac_handler, &audio_specific_data, &audio_specific_data_len);
    
    //將獲取的audio specific config data 存儲到faac_ctx中
    if (audio_specific_data_len > 0) {
        faac_ctx->audio_specific_config_data = alloc_aw_data(0);
        memcpy_aw_data(&faac_ctx->audio_specific_config_data, audio_specific_data, (uint32_t)audio_specific_data_len);
    }
    
}
//函數內具體參數配置，請參考：
//http://wenku.baidu.com/link?url=0E9GnSo7hZ-3WmB_eXz8EfnG8NqJJJtvjrVNW7hW-VEYWW-gYBMVM-CnFSicDE-veDl2tzfL-nu2FQ8msGcCOALuT8VW1l_NjQL9Gvw5V6_

複製代碼

faac封裝第二步：開始編碼

/*
	pcm_data 爲 pcm格式的音頻數據
	len 表示數據字節數
*/
extern void aw_encode_pcm_frame_2_aac(aw_faac_context *ctx, int8_t *pcm_data, long len){
    //判斷輸入參數
    if (!pcm_data || len <= 0) {
        aw_log("[E] aw_encode_pcm_frame_2_aac params error");
        return;
    }

	//清空encoded_aac_data，每次編碼數據最終會存儲到此字段中，因此首先清空。
    reset_aw_data(&ctx->encoded_aac_data);
    
    /*
    	下列代碼根據第一步"開啓編碼環境"函數中計算的最大輸入子節數
    	將pcm_data分割成合適的大小，使用faacEncEncode函數將pcm數據編碼成aac數據。

    	下列代碼執行完成後，編碼出的aac數據將會存儲到encoded_aac_data字段中。
    */
    long max_input_count = ctx->max_input_byte_count;
    long curr_read_count = 0;
    
    do{
        long remain_count = len - curr_read_count;
        if (remain_count <= 0) {
            break;
        }
        long read_count = 0;
        if (remain_count > max_input_count) {
            read_count = max_input_count;
        }else{
            read_count = remain_count;
        }
        
        long input_samples = read_count * 8 / ctx->config.sample_size;
        int write_count = faacEncEncode(ctx->faac_handler, (int32_t * )(pcm_data + curr_read_count), (uint32_t)input_samples, (uint8_t *)ctx->aac_buffer, (uint32_t)ctx->max_output_byte_count);
        
        if (write_count > 0) {
            data_writer.write_bytes(&ctx->encoded_aac_data, (const uint8_t *)ctx->aac_buffer, write_count);
        }
        
        curr_read_count += read_count;
    } while (curr_read_count + max_input_count < len);
}
複製代碼

faac封裝第三步：關閉編碼器：

extern void free_aw_faac_context(aw_faac_context **context_p){
    ...
    //關閉faac編碼器
    faacEncClose(context->faac_handler);
    ...
}
複製代碼

上述代碼僅僅做爲faac編碼器的封裝，可以實現打開編碼器。編碼

真正實現編碼過程的文件是：aw_sw_faac_encoder.h/aw_sw_faac_encoder.c文件

此文件的功能是：將傳入的pcm數據經過aw_faac.c提供的功能，將數據轉成aac數據格式，而後將aac數據格式轉成flv格式，如何轉成flv格式，會在後續文章介紹。

來看一下 aw_sw_faac_encoder.c文件的實現。此文件邏輯也很清晰，它實現的功能有：

開啓編碼器，建立一些過程變量。
將audio specific config data 轉成flv幀數據。
將接收到的pcm數據，轉成aac數據，而後將aac數據轉成flv音頻數據。
關閉編碼器。

能夠看出，這種相似功能性代碼，通常都是三部曲：打開－使用－關閉。

下面來看代碼。音頻軟編碼器第一步：開啓編碼器

/*
	faac_config：須要由上層傳入相關配置屬性
*/
extern void aw_sw_encoder_open_faac_encoder(aw_faac_config *faac_config){
	//是否已經開啓了，避免重複開啓
    if (aw_sw_faac_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_open_faac_encoder when encoder is already inited");
        return;
    }
    
    //建立配置
    int32_t faac_cfg_len = sizeof(aw_faac_config);
    if (!s_faac_config) {
        s_faac_config = aw_alloc(faac_cfg_len);
    }
    memcpy(s_faac_config, faac_config, faac_cfg_len);
    
    //開啓faac軟編碼
    s_faac_ctx = alloc_aw_faac_context(*faac_config);
}
複製代碼

音頻軟編碼第二步：將audio specific config data 轉成flv幀數據。

extern aw_flv_audio_tag *aw_sw_encoder_create_faac_specific_config_tag(){
	//是否已打開編碼器
    if(!aw_sw_faac_encoder_is_valid()){
        aw_log("[E] aw_sw_encoder_create_faac_specific_config_tag when audio encoder is not inited");
        return NULL;
    }
    
    //建立 audio specfic config record
    aw_flv_audio_tag *aac_tag = aw_sw_encoder_create_flv_audio_tag(&s_faac_ctx->config);
    //根據flv協議：audio specific data對應的 aac_packet_type 固定爲 aw_flv_a_aac_package_type_aac_sequence_header 值爲0
    //普通的音頻幀，此處值爲1.
    aac_tag->aac_packet_type = aw_flv_a_aac_package_type_aac_sequence_header;
    
    aac_tag->config_record_data = copy_aw_data(s_faac_ctx->audio_specific_config_data);
    aac_tag->common_tag.timestamp = 0;
    aac_tag->common_tag.data_size = s_faac_ctx->audio_specific_config_data->size + 11 + aac_tag->common_tag.header_size;
    
    return aac_tag;
}
複製代碼

音頻軟編碼器第三步：將接收到的pcm數據轉成aac數據，而後將aac數據轉成flv音頻數據

/*
	pcm_data: 傳入的pcm數據
	len: pcm數據長度
	timestamp：flv時間戳，rtmp協議要求發送的flv音視頻幀的時間戳需爲均勻增長，不容許 後發送的數據時間戳 比 先發送的數據的時間戳 還要小。
	aw_flv_audio_tag: 返回類型，生成的flv音頻數據（flv中，每幀數據稱爲一個tag）。
*/
extern aw_flv_audio_tag *aw_sw_encoder_encode_faac_data(int8_t *pcm_data, long len, uint32_t timestamp){
    if (!aw_sw_faac_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_encode_faac_data when encoder is not inited");
        return NULL;
    }
    
    //將pcm數據編碼成aac數據
    aw_encode_pcm_frame_2_aac(s_faac_ctx, pcm_data, len);
    
    // 使用faac編碼的數據會帶有7個字節的adts頭。rtmp不接受此值，在此去掉前7個字節。
    int adts_header_size = 7;
    
    //除去ADTS頭的7字節
    if (s_faac_ctx->encoded_aac_data->size <= adts_header_size) {
        return NULL;
    }
    
    //將aac數據封裝成flv音頻幀。flv幀僅僅是將aac數據增長一些固定信息。並無對aac數據進行編碼操做。
    aw_flv_audio_tag *audio_tag = aw_encoder_create_audio_tag((int8_t *)s_faac_ctx->encoded_aac_data->data + adts_header_size, s_faac_ctx->encoded_aac_data->size - adts_header_size, timestamp, &s_faac_ctx->config);
    
    audio_count++;
    
    //返回結果
    return audio_tag;
}
複製代碼

音頻軟編碼器第四步：關閉編碼器

extern void aw_sw_encoder_close_faac_encoder(){
	//避免重複關閉
    if (!aw_sw_faac_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_close_faac_encoder when encoder is not inited");
        return;
    }
    
    //是否aw_faac_context，也就關閉了faac編碼環境。
    free_aw_faac_context(&s_faac_ctx);
    
    //釋放配置數據
    if (s_faac_config) {
        aw_free(s_faac_config);
        s_faac_config = NULL;
    }
}
複製代碼

到此爲止，音頻軟編碼器就介紹完了。已經成功實現了將pcm數據轉成flv音頻幀。

下面介紹視頻軟編碼。套路同音頻編碼一致，對應的視頻軟編碼是對x264這個庫的封裝。文件在aw_x264.h/aw_x264.c中。

它實現的功能以下：

初始化x264參數，打開編碼環境
進行編碼
關閉編碼環境。

x264封裝第一步：初始化x264參數，打開編碼環境

/*
	config 表示配置數據
	aw_x264_context 是自定義結構體，用於存儲x264編碼重要屬性及過程變量。
*/
extern aw_x264_context *alloc_aw_x264_context(aw_x264_config config){
    aw_x264_context *ctx = aw_alloc(sizeof(aw_x264_context));
    memset(ctx, 0, sizeof(aw_x264_context));
    
    //數據數據默認爲 I420
    if (!config.input_data_format) {
        config.input_data_format = X264_CSP_I420;
    }
    
    //建立handler
    memcpy(&ctx->config, &config, sizeof(aw_x264_config));
    x264_param_t *x264_param = NULL;
    //x264參數，具體請參考：http://blog.csdn.net/table/article/details/8085115
    aw_create_x264_param(ctx, &x264_param);
    //開啓編碼器
    aw_open_x264_handler(ctx, x264_param);
    aw_free(x264_param);
    
    //建立pic_in，x264內部用於存儲輸入圖像數據的一段空間。
    x264_picture_t *pic_in = aw_alloc(sizeof(x264_picture_t));
    x264_picture_init(pic_in);
    
    //[注意有坑]
    //aw_stride是一個宏，用於將視頻寬度轉成16的倍數。若是不是16的倍數，有時候會編碼失敗（顏色缺失等）。
    int alloc_width = aw_stride(config.width);
    
    x264_picture_alloc(pic_in, config.input_data_format, alloc_width, config.height);

    pic_in->img.i_csp = config.input_data_format;
    
    //i_stride 表示換行步長，跟plane數及格式有關，x264內部用來斷定讀取多少數據須要換行。
    //關於yuv數據格式在第二章裏面介紹過，這裏再次回顧一下。
    if (config.input_data_format == X264_CSP_NV12) {
    	//nv12數據包含2個plane，第一個plane存儲了y數據大小爲 width * height，
    	//第二個plane存儲uv數據，u和v隔位存儲，數據大小爲：width * (height / 2)
        pic_in->img.i_stride[0] = alloc_width;
        pic_in->img.i_stride[1] = alloc_width;
        pic_in->img.i_plane = 2;
    }else if(config.input_data_format == X264_CSP_BGR || config.input_data_format == X264_CSP_RGB){
    	//rgb數據包含一個plane，數據長度爲 width * 3 * height。
        pic_in->img.i_stride[0] = alloc_width * 3;
        pic_in->img.i_plane = 1;
    }else if(config.input_data_format == X264_CSP_BGRA){
    	//bgra同rgb相似
        pic_in->img.i_stride[0] = alloc_width * 4;
        pic_in->img.i_plane = 1;
    }else{//YUV420
    	//yuv420即I420格式。
    	//包含3個plane，第一個plane存儲y數據大小爲width * height
    	//第二個存儲u數據，數據大小爲 width * height / 4
    	//第三個存儲v數據，數據大小爲 width * height / 4
        pic_in->img.i_stride[0] = alloc_width;
        pic_in->img.i_stride[1] = alloc_width / 2;
        pic_in->img.i_stride[2] = alloc_width / 2;
        pic_in->img.i_plane = 3;
    }
    
    //其餘數據初始化，pic_in 用於存儲輸入數據(yuv/rgb等數據)，pic_out用於存儲輸出數據(h264數據)
    ctx->pic_in = pic_in;
    
    ctx->pic_out = aw_alloc(sizeof(x264_picture_t));
    x264_picture_init(ctx->pic_out);
    
    //編碼後數據變量
    ctx->encoded_h264_data = alloc_aw_data(0);
    ctx->sps_pps_data = alloc_aw_data(0);
    
    //獲取sps pps
    // sps pps 數據是rtmp協議要求的必需在全部flv視頻幀以前發送的一幀數據，存儲了h264視頻的一些關鍵屬性。
    // 具體獲取方法請看demo，很簡單，這裏就不解釋了。
    aw_encode_x264_header(ctx);
    
    return ctx;
}
複製代碼

x264封裝第二步：開始編碼

//編碼一幀數據
extern void aw_encode_yuv_frame_2_x264(aw_x264_context *aw_ctx, int8_t *yuv_frame, int len){
    if (len > 0 && yuv_frame) {
    	//將視頻數據填充到pic_in中，pic_in上面已經介紹過，x264須要這樣處理。
        int actual_width = aw_stride(aw_ctx->config.width);
        //數據保存到pic_in中
        if (aw_ctx->config.input_data_format == X264_CSP_NV12) {
            aw_ctx->pic_in->img.plane[0] = (uint8_t *)yuv_frame;
            aw_ctx->pic_in->img.plane[1] = (uint8_t *)yuv_frame + actual_width * aw_ctx->config.height;
        }else if(aw_ctx->config.input_data_format == X264_CSP_BGR || aw_ctx->config.input_data_format == X264_CSP_RGB){
            aw_ctx->pic_in->img.plane[0] = (uint8_t *)yuv_frame;
        }else if(aw_ctx->config.input_data_format == X264_CSP_BGRA){
            aw_ctx->pic_in->img.plane[0] = (uint8_t *)yuv_frame;
        }else{//YUV420
            aw_ctx->pic_in->img.plane[0] = (uint8_t *)yuv_frame;
            aw_ctx->pic_in->img.plane[1] = (uint8_t *)yuv_frame + actual_width * aw_ctx->config.height;
            aw_ctx->pic_in->img.plane[2] = (uint8_t *)yuv_frame + actual_width * aw_ctx->config.height * 5 / 4;
        }
        //x264編碼，編碼後的數據存儲在aw_ctx->nal中
        x264_encoder_encode(aw_ctx->x264_handler, &aw_ctx->nal, &aw_ctx->nal_count, aw_ctx->pic_in, aw_ctx->pic_out);
        aw_ctx->pic_in->i_pts++;
    }
    
    //將編碼後的數據轉存到encoded_h264_data中，這裏面存儲的就是編碼好的h264視頻幀了。
    reset_aw_data(&aw_ctx->encoded_h264_data);
    if (ctx->nal_count > 0) {
        int i = 0;
        for (; i < ctx->nal_count; i++) {
            data_writer.write_bytes(&ctx->encoded_h264_data, ctx->nal[i].p_payload, ctx->nal[i].i_payload);
        }
    }
}
複製代碼

x264封裝第三步：關閉編碼環境。

/*
	很簡單，分別釋放pic_in，pic_out，x264_handler便可
*/
extern void free_aw_x264_context(aw_x264_context **ctx_p){
    aw_x264_context *ctx = *ctx_p;
    if (ctx) {
        //釋放pic_in
        if (ctx->pic_in) {
            x264_picture_clean(ctx->pic_in);
            aw_free(ctx->pic_in);
            ctx->pic_in = NULL;
        }
        
        //釋放pic_out
        if (ctx->pic_out) {
            aw_free(ctx->pic_out);
            ctx->pic_out = NULL;
        }

        ...
        
        //關閉handler
        if (ctx->x264_handler) {
            x264_encoder_close(ctx->x264_handler);
            ctx->x264_handler = NULL;
        }
        ...
    }
}
複製代碼

上面的代碼只是對x264編碼流程進行簡單封裝。真正實現完整轉碼邏輯的是在 aw_sw_x264_encoder.h/aw_sw_x264_encoder.c 中。

它實現了以下功能：

將收到的yuv數據編碼成 h264格式。
生成包含sps/pps數據的flv視頻幀。
將h264格式的數據轉成flv視頻數據。
關閉編碼器。

視頻軟編碼器第一步：收到yuv數據，並編碼成h264格式。

//打開編碼器，就是在aw_x264基礎上，封了一層。
extern void aw_sw_encoder_open_x264_encoder(aw_x264_config *x264_config){
    if (aw_sw_x264_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_open_video_encoder when video encoder is not inited");
        return;
    }
    
    int32_t x264_cfg_len = sizeof(aw_x264_config);
    if (!s_x264_config) {
        s_x264_config = aw_alloc(x264_cfg_len);
    }
    memcpy(s_x264_config, x264_config, x264_cfg_len);
    
    s_x264_ctx = alloc_aw_x264_context(*x264_config);
}
複製代碼

視頻軟編碼器第二步：生成包含sps/pps數據的flv視頻幀

//根據flv/h264/aac協議建立video/audio首幀tag，flv 格式相關代碼在 aw_encode_flv.h/aw_encode_flv.c 中
extern aw_flv_video_tag *aw_sw_encoder_create_x264_sps_pps_tag(){
    if(!aw_sw_x264_encoder_is_valid()){
        aw_log("[E] aw_sw_encoder_create_video_sps_pps_tag when video encoder is not inited");
        return NULL;
    }
    
    //建立 sps pps
    // 建立flv視頻tag
    aw_flv_video_tag *sps_pps_tag = aw_sw_encoder_create_flv_video_tag();
    // 關鍵幀
    sps_pps_tag->frame_type = aw_flv_v_frame_type_key;
    // package type 爲header，固定
    sps_pps_tag->h264_package_type = aw_flv_v_h264_packet_type_seq_header;
    // cts，項目內全部視頻幀的cts 都爲0
    sps_pps_tag->h264_composition_time = 0;
    // 將aw_x264中生成的sps/pps數據copy到tag中
    sps_pps_tag->config_record_data = copy_aw_data(s_x264_ctx->sps_pps_data);
    // 時間戳爲0
    sps_pps_tag->common_tag.timestamp = 0;
    // flv tag長度爲：header size + data header(11字節) + 數據長度（後續介紹）
    sps_pps_tag->common_tag.data_size = s_x264_ctx->sps_pps_data->size + 11 + sps_pps_tag->common_tag.header_size;
    return sps_pps_tag;
}
複製代碼

視頻軟編碼器第三步：將h264格式的數據轉成flv視頻數據。

//將採集到的video yuv數據，編碼爲flv video tag
extern aw_flv_video_tag * aw_sw_encoder_encode_x264_data(int8_t *yuv_data, long len, uint32_t timeStamp){
	//是否已開啓編碼
    if (!aw_sw_x264_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_encode_video_data when video encoder is not inited");
        return NULL;
    }
    
    //執行編碼
    aw_encode_yuv_frame_2_x264(s_x264_ctx, yuv_data, (int32_t)len);
    
    //編碼後是否能取到數據
    if (s_x264_ctx->encoded_h264_data->size <= 0) {
        return NULL;
    }
    
    //將h264數據轉成flv tag
    x264_picture_t *pic_out = s_x264_ctx->pic_out;
    
    aw_flv_video_tag *video_tag = aw_encoder_create_video_tag((int8_t *)s_x264_ctx->encoded_h264_data->data, s_x264_ctx->encoded_h264_data->size, timeStamp, (uint32_t)((pic_out->i_pts - pic_out->i_dts) * 1000.0 / s_x264_ctx->config.fps), pic_out->b_keyframe);

    ...
    
    return video_tag;
}

複製代碼

視頻軟編碼器第四步：關閉編碼器

//關閉編碼器
extern void aw_sw_encoder_close_x264_encoder(){
	//避免重複關閉
    if (!aw_sw_x264_encoder_is_valid()) {
        aw_log("[E] aw_sw_encoder_close_video_encoder s_faac_ctx is NULL");
        return;
    }
    
    //釋放配置
    if (s_x264_config) {
        aw_free(s_x264_config);
        s_x264_config = NULL;
    }
    
    //釋放context
    free_aw_x264_context(&s_x264_ctx);
}
複製代碼

至此，軟編碼代碼介紹完畢。能夠經過 AWSWFaacEncoder/AWSWX264Encoder 類調用上面的軟編碼器，給上層提供一致的接口。

總結，軟編碼器涉及的內容：

第三方編碼器：libfaac/libx264
第三方編碼器封裝：aw_faac.h/aw_faac.c，aw_x264.h/aw_x264.c
編碼器(將原始數據轉成最終數據)封裝：aw_sw_faac_encoder.h/aw_sw_faac_encoder.c，aw_sw_x264_encoder.h/aw_sw_x264_encoder.c
頂層抽象：AWSWFaacEncoder/AWSWX264Encoder

編碼過程當中須要注意的地方：