FFmpeg編解碼處理4-音頻編碼

時間 2020-05-20

原文原文鏈接

本文爲做者原創，轉載請註明出處：http://www.javashuo.com/article/p-ayfagnqn-hw.htmlhtml

FFmpeg 編解碼處理系列筆記：
[0]. FFmpeg時間戳詳解
[1]. FFmpeg編解碼處理1-轉碼全流程簡介
[2]. FFmpeg編解碼處理2-編解碼API詳解
[3]. FFmpeg編解碼處理3-視頻編碼
[4]. FFmpeg編解碼處理4-音頻編碼git

基於 FFmpeg 4.1 版本。github

6. 音頻編碼

編碼使用 avcodec_send_frame() 和 avcodec_receive_packet() 兩個函數。ide

音頻編碼的步驟：
[1] 初始化打開輸出文件時構建編碼器上下文
[2] 音頻幀編碼
[2.1] 將濾鏡輸出的音頻幀寫入音頻 FIFO
[2.2] 按音頻編碼器中要求的音頻幀尺寸從音頻 FIFO 中取出音頻幀
[2.3] 爲音頻幀生成 pts
[2.4] 將音頻幀送入編碼器，從編碼器取出編碼幀
[2.5] 更新編碼幀流索引
[2.6] 將幀中時間參數按輸出封裝格式的時間基進行轉換函數

6.1 打開視頻編碼器

完整源碼在 open_output_file() 函數中，下面摘出關鍵部分：佈局

// 3. 構建AVCodecContext
    if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
        dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO)          // 音頻流或視頻流
    {
        // 3.1 查找編碼器AVCodec，本例使用與解碼器相同的編碼器
        AVCodec *encoder = NULL;
        if ((dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) && (strcmp(v_enc_name, "copy") != 0))
        {
            encoder = avcodec_find_encoder_by_name(v_enc_name);
        }
        else if ((dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) && (strcmp(a_enc_name, "copy") != 0))
        {
            encoder = avcodec_find_encoder_by_name(a_enc_name);
        }
        else 
        {
            encoder = avcodec_find_encoder(dec_ctx->codec_id);
        }

        if (!encoder)
        {
            av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
            return AVERROR_INVALIDDATA;
        }
        // 3.2 AVCodecContext初始化：分配結構體，使用AVCodec初始化AVCodecContext相應成員爲默認值
        AVCodecContext *enc_ctx = avcodec_alloc_context3(encoder);
        if (!enc_ctx)
        {
            av_log(NULL, AV_LOG_FATAL, "Failed to allocate the encoder context\n");
            return AVERROR(ENOMEM);
        }

        // 3.3 AVCodecContext初始化：配置圖像/聲音相關屬性
        /* In this example, we transcode to same properties (picture size,
         * sample rate etc.). These properties can be changed for output
         * streams easily using filters */
        if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            enc_ctx->height = dec_ctx->height;              // 圖像高
            enc_ctx->width = dec_ctx->width;                // 圖像寬
            enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; // 採樣寬高比：像素寬/像素高
            /* take first format from list of supported formats */
            if (encoder->pix_fmts)  // 編碼器支持的像素格式列表
            {
                enc_ctx->pix_fmt = encoder->pix_fmts[0];    // 編碼器採用所支持的第一種像素格式
            }
            else
            {
                enc_ctx->pix_fmt = dec_ctx->pix_fmt;        // 編碼器採用解碼器的像素格式
            }
            /* video time_base can be set to whatever is handy and supported by encoder */
            enc_ctx->time_base = av_inv_q(dec_ctx->framerate);  // 時基：解碼器幀率取倒數
            enc_ctx->framerate = dec_ctx->framerate;
            //enc_ctx->bit_rate = dec_ctx->bit_rate;

            /* emit one intra frame every ten frames
            * check frame pict_type before passing frame
            * to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
            * then gop_size is ignored and the output of encoder
            * will always be I frame irrespective to gop_size
            */
            //enc_ctx->gop_size = 10;
            //enc_ctx->max_b_frames = 1;
        }
        else
        {
            enc_ctx->sample_rate = dec_ctx->sample_rate;    // 採樣率
            enc_ctx->channel_layout = dec_ctx->channel_layout; // 聲道佈局
            enc_ctx->channels = av_get_channel_layout_nb_channels(enc_ctx->channel_layout); // 聲道數量
            /* take first format from list of supported formats */
            enc_ctx->sample_fmt = encoder->sample_fmts[0];  // 編碼器採用所支持的第一種採樣格式
            enc_ctx->time_base = (AVRational){1, enc_ctx->sample_rate}; // 時基：編碼器採樣率取倒數
            // enc_ctx->codec->capabilities |= AV_CODEC_CAP_VARIABLE_FRAME_SIZE; // 只讀標誌

            // 初始化一個FIFO用於存儲待編碼的音頻幀，初始化FIFO大小的1個採樣點
            // av_audio_fifo_alloc()第二個參數是聲道數，第三個參數是單個聲道的採樣點數
            // 採樣格式及聲道數在初始化FIFO時已設置，各處涉及FIFO大小的地方都是用的單個聲道的採樣點數
            pp_audio_fifo[i] = av_audio_fifo_alloc(enc_ctx->sample_fmt, enc_ctx->channels, 1);
            if (pp_audio_fifo == NULL)
            {
                av_log(NULL, AV_LOG_ERROR, "Could not allocate FIFO\n");
                return AVERROR(ENOMEM);
            }
        }

        if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
        {
            enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
        }

        // 3.4 AVCodecContext初始化：使用AVCodec初始化AVCodecContext，初始化完成
        /* Third parameter can be used to pass settings to encoder */
        ret = avcodec_open2(enc_ctx, encoder, NULL);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i);
            return ret;
        }
        // 3.5 設置輸出流codecpar
        ret = avcodec_parameters_from_context(out_stream->codecpar, enc_ctx);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "Failed to copy encoder parameters to output stream #%u\n", i);
            return ret;
        }

        // 3.6 保存輸出流contex
        pp_enc_ctx[i] = enc_ctx;
    }

6.2 判斷是否須要音頻 FIFO

完整源碼在 main() 函數中，下面摘出關鍵部分：ui

if (codec_type == AVMEDIA_TYPE_AUDIO) {
        if (((stream.o_codec_ctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) == 0) &&
            (stream.i_codec_ctx->frame_size != stream.o_codec_ctx->frame_size))
        {
            stream.aud_fifo = oafifo[stream_index];
            ret = transcode_audio_with_afifo(&stream, &ipacket);
        }
        else
        {
            ret = transcode_audio(&stream, &ipacket);
        }
    }

解碼過程當中的音頻幀尺寸：
AVCodecContext.frame_size 表示音頻幀中每一個聲道包含的採樣點數。當編碼器 AV_CODEC_CAP_VARIABLE_FRAME_SIZE 標誌有效時，音頻幀尺寸是可變的，AVCodecContext.frame_size 值可能爲 0；不然，解碼器的 AVCodecContext.frame_size 等於解碼幀中的 AVFrame.nb_samples。this

編碼過程當中的音頻幀尺寸：
上述代碼中第一個判斷條件是 "(stream.o_codec_ctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) == 0)", 第二個判斷條件是 "(stream.i_codec_ctx->frame_size != stream.o_codec_ctx->frame_size)"。若是編碼器不支持可變尺寸音頻幀(第一個判斷條件生效)，而原始音頻幀的尺寸又和編碼器幀尺寸不同(第二個判斷條件生效)，則須要引入音頻幀 FIFO，以保證每次從 FIFO 中取出的音頻幀尺寸和編碼器幀尺寸同樣。音頻 FIFO 輸出的音頻幀不含時間戳信息，所以須要從新生成時間戳。編碼

引入音頻FIFO的緣由：
若是編碼器不支持可變長度幀，而編碼器輸入音頻幀尺寸和編碼器要求的音頻幀尺寸不同，就會編碼失敗。好比，AAC 音頻格式轉 MP2 音頻格式，AAC 格式音頻幀尺寸爲 1024，而 MP2 音頻編碼器要求音頻幀尺寸爲 1152，編碼會失敗；再好比 AAC 格式轉碼 AAC 格式，某些 AAC 音頻幀爲 2048，而此時若 AAC 音頻編碼器要求音頻幀尺寸爲 1024，編碼就會失敗。解決這個問題的方法有兩個，一是進行音頻重採樣，使音頻幀轉換爲編碼器支持的格式；另外一個是引入音頻 FIFO，一端寫一端讀，每次從讀端取出編碼器要求的幀尺寸便可。spa

AAC 音頻幀尺寸多是 1024，也多是 2048，參考「FFmpeg關於nb_smples,frame_size以及profile的解釋」

6.3 音頻 FIFO 接口函數

本節代碼參考 "https://github.com/FFmpeg/FFmpeg/blob/n4.1/doc/examples/transcode_aac.c" 實現

/**
 * Initialize one input frame for writing to the output file.
 * The frame will be exactly frame_size samples large.
 * @param[out] frame                Frame to be initialized
 * @param      output_codec_context Codec context of the output file
 * @param      frame_size           Size of the frame
 * @return Error code (0 if successful)
 */
static int init_audio_output_frame(AVFrame **frame,
                                   AVCodecContext *occtx,
                                   int frame_size)
{
    int error;

    /* Create a new frame to store the audio samples. */
    if (!(*frame = av_frame_alloc()))
    {
        fprintf(stderr, "Could not allocate output frame\n");
        return AVERROR_EXIT;
    }

    /* Set the frame's parameters, especially its size and format.
     * av_frame_get_buffer needs this to allocate memory for the
     * audio samples of the frame.
     * Default channel layouts based on the number of channels
     * are assumed for simplicity. */
    (*frame)->nb_samples     = frame_size;
    (*frame)->channel_layout = occtx->channel_layout;
    (*frame)->format         = occtx->sample_fmt;
    (*frame)->sample_rate    = occtx->sample_rate;

    /* Allocate the samples of the created frame. This call will make
     * sure that the audio frame can hold as many samples as specified. */
    // 爲AVFrame分配緩衝區，此函數會填充AVFrame.data和AVFrame.buf，如有須要，也會填充
    // AVFrame.extended_data和AVFrame.extended_buf，對於planar格式音頻，會爲每一個plane
    // 分配一個緩衝區
    if ((error = av_frame_get_buffer(*frame, 0)) < 0)
    {
        fprintf(stderr, "Could not allocate output frame samples (error '%s')\n",
                av_err2str(error));
        av_frame_free(frame);
        return error;
    }

    return 0;
}

// FIFO中可讀數據小於編碼器幀尺寸，則繼續往FIFO中寫數據
static int write_frame_to_audio_fifo(AVAudioFifo *fifo,
                                     uint8_t **new_data,
                                     int new_size)
{
    int ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + new_size);
    if (ret < 0)
    {
        fprintf(stderr, "Could not reallocate FIFO\n");
        return ret;
    }
    
    /* Store the new samples in the FIFO buffer. */
    ret = av_audio_fifo_write(fifo, (void **)new_data, new_size);
    if (ret < new_size)
    {
        fprintf(stderr, "Could not write data to FIFO\n");
        return AVERROR_EXIT;
    }

    return 0;
}

static int read_frame_from_audio_fifo(AVAudioFifo *fifo,
                                      AVCodecContext *occtx,
                                      AVFrame **frame)
{
    AVFrame *output_frame;
    // 若是FIFO中可讀數據多於編碼器幀大小，則只讀取編碼器幀大小的數據出來
    // 不然將FIFO中數據讀完。frame_size是幀中單個聲道的採樣點數
    const int frame_size = FFMIN(av_audio_fifo_size(fifo), occtx->frame_size);

    /* Initialize temporary storage for one output frame. */
    // 分配AVFrame及AVFrame數據緩衝區
    int ret = init_audio_output_frame(&output_frame, occtx, frame_size);
    if (ret < 0)
    {
        return AVERROR_EXIT;
    }

    // 從FIFO從讀取數據填充到output_frame->data中
    ret = av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size);
    if (ret < frame_size)
    {
        fprintf(stderr, "Could not read data from FIFO\n");
        av_frame_free(&output_frame);
        return AVERROR_EXIT;
    }

    *frame = output_frame;

    return ret;
}

6.4 編碼音頻幀

完整源碼在 transcode_audio_with_afifo() 函數中，下面摘出關鍵部分：

// 2. 濾鏡處理
    ret = filtering_frame(sctx->flt_ctx, frame_dec, frame_flt);
    if (ret == AVERROR_EOF)         // 濾鏡已沖洗
    {
        flt_finished = true;
        av_log(NULL, AV_LOG_INFO, "filtering aframe EOF\n");
        frame_flt = NULL;
    }
    else if (ret < 0)
    {
        av_log(NULL, AV_LOG_INFO, "filtering aframe error %d\n", ret);
        goto end;
    }

    // 3. 使用音頻fifo，從而保證每次送入編碼器的音頻幀尺寸知足編碼器要求
    // 3.1 將音頻幀寫入fifo，音頻幀尺寸是解碼格式中音頻幀尺寸
    if (!dec_finished)
    {
        uint8_t** new_data = frame_flt->extended_data;  // 本幀中多個聲道音頻數據
        int new_size = frame_flt->nb_samples;           // 本幀中單個聲道的採樣點數
        
        // FIFO中可讀數據小於編碼器幀尺寸，則繼續往FIFO中寫數據
        ret = write_frame_to_audio_fifo(p_fifo, new_data, new_size);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_INFO, "write aframe to fifo error\n");
            goto end;
        }
    }

    // 3.2 從fifo中取出音頻幀，音頻幀尺寸是編碼格式中音頻幀尺寸
    // FIFO中可讀數據大於編碼器幀尺寸，則從FIFO中讀走數據進行處理
    while ((av_audio_fifo_size(p_fifo) >= enc_frame_size) || dec_finished)
    {
        bool flushing = dec_finished && (av_audio_fifo_size(p_fifo) == 0);  // 已取空，刷洗編碼器
        
        if (frame_enc != NULL)
        {
            av_frame_free(&frame_enc);
        }

        if (!flushing)
        {
            // 從FIFO中讀取數據，編碼，寫入輸出文件
            ret = read_frame_from_audio_fifo(p_fifo, sctx->o_codec_ctx, &frame_enc);
            if (ret < 0)
            {
                av_log(NULL, AV_LOG_INFO, "read aframe from fifo error\n");
                goto end;
            }

            // 4. fifo中讀取的音頻幀沒有時間戳信息，從新生成pts
            frame_enc->pts = s_pts;
            s_pts += ret;
        }

flush_encoder:
        // 5. 編碼
        ret = av_encode_frame(sctx->o_codec_ctx, frame_enc, &opacket);
        if (ret == AVERROR(EAGAIN))     // 須要獲取新的frame餵給編碼器
        {
            //av_log(NULL, AV_LOG_INFO, "encode aframe need more packet\n");
            if (frame_enc != NULL)
            {
                av_frame_free(&frame_enc);
            }
            continue;
        }
        else if (ret == AVERROR_EOF)
        {
            av_log(NULL, AV_LOG_INFO, "encode aframe EOF\n");
            enc_finished = true;
            goto end;
        }

        // 5.1 更新編碼幀中流序號，並進行時間基轉換
        //     AVPacket.pts和AVPacket.dts的單位是AVStream.time_base，不一樣的封裝格式其AVStream.time_base不一樣
        //     因此輸出文件中，每一個packet須要根據輸出封裝格式從新計算pts和dts
        opacket.stream_index = sctx->stream_idx;
        av_packet_rescale_ts(&opacket, sctx->o_codec_ctx->time_base, sctx->o_stream->time_base);
        
        av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n");

        // 6. 將編碼後的packet寫入輸出媒體文件
        ret = av_interleaved_write_frame(sctx->o_fmt_ctx, &opacket);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_INFO, "write aframe error %d\n", ret);
            goto end;
        }

        if (flushing)
        {
            goto flush_encoder;
        }
    }

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。