ffplay源碼分析6-音頻重採樣

本文爲做者原創,轉載請註明出處:https://www.cnblogs.com/leisure_chn/p/10312713.htmlhtml

ffplay是FFmpeg工程自帶的簡單播放器,使用FFmpeg提供的解碼器和SDL庫進行視頻播放。本文基於FFmpeg工程4.1版本進行分析,其中ffplay源碼清單以下:
https://github.com/FFmpeg/FFmpeg/blob/n4.1/fftools/ffplay.cgit

在嘗試分析源碼前,可先閱讀以下參考文章做爲鋪墊:
[1]. 雷霄驊,視音頻編解碼技術零基礎學習方法
[2]. 視頻編解碼基礎概念
[3]. 色彩空間與像素格式
[4]. 音頻參數解析
[5]. FFmpeg基礎概念github

「ffplay源碼分析」系列文章以下:
[1]. ffplay源碼分析1-概述
[2]. ffplay源碼分析2-數據結構
[3]. ffplay源碼分析3-代碼框架
[4]. ffplay源碼分析4-音視頻同步
[5]. ffplay源碼分析5-圖像格式轉換
[6]. ffplay源碼分析6-音頻重採樣
[7]. ffplay源碼分析7-播放控制express

6. 音頻重採樣

FFmpeg解碼獲得的音頻幀的格式未必能被SDL支持,在這種狀況下,須要進行音頻重採樣,即將音頻幀格式轉換爲SDL支持的音頻格式,不然是沒法正常播放的。
音頻重採樣涉及兩個步驟:
1) 打開音頻設備時進行的準備工做:肯定SDL支持的音頻格式,做爲後期音頻重採樣的目標格式
2) 音頻播放線程中,取出音頻幀後,如有須要(音頻幀格式與SDL支持音頻格式不匹配)則進行重採樣,不然直接輸出數組

6.1 打開音頻設備

音頻設備的打開實際是在解複用線程中實現的。解複用線程中先打開音頻設備(設定音頻回調函數供SDL音頻播放線程回調),而後再建立音頻解碼線程。調用鏈以下:數據結構

main() -->
stream_open() -->
read_thread() -->
stream_component_open() -->
    audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt);
    decoder_start(&is->auddec, audio_thread, is);

audio_open()函數填入指望的音頻參數,打開音頻設備後,將實際的音頻參數存入輸出參數is->audio_tgt中,後面音頻播放線程用會用到此參數,使用此參數將原始音頻數據重採樣,轉換爲音頻設備支持的格式。app

static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
    SDL_AudioSpec wanted_spec, spec;
    const char *env;
    static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
    static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
    int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;

    env = SDL_getenv("SDL_AUDIO_CHANNELS");
    if (env) {  // 若環境變量有設置,優先從環境變量取得聲道數和聲道佈局
        wanted_nb_channels = atoi(env);
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
    }
    if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
        wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
    }
    // 根據channel_layout獲取nb_channels,當傳入參數wanted_nb_channels不匹配時,此處會做修正
    wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
    wanted_spec.channels = wanted_nb_channels;  // 聲道數
    wanted_spec.freq = wanted_sample_rate;      // 採樣率
    if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
        av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
        return -1;
    }
    while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
        next_sample_rate_idx--;     // 從採樣率數組中找到第一個不大於傳入參數wanted_sample_rate的值
    // 音頻採樣格式有兩大類型:planar和packed,假設一個雙聲道音頻文件,一個左聲道採樣點記做L,一個右聲道採樣點記做R,則:
    // planar存儲格式:(plane1)LLLLLLLL...LLLL (plane2)RRRRRRRR...RRRR
    // packed存儲格式:(plane1)LRLRLRLR...........................LRLR
    // 在這兩種採樣類型下,又細分多種採樣格式,如AV_SAMPLE_FMT_S1六、AV_SAMPLE_FMT_S16P等,注意SDL2.0目前不支持planar格式
    // channel_layout是int64_t類型,表示音頻聲道佈局,每bit表明一個特定的聲道,參考channel_layout.h中的定義,一目瞭然
    // 數據量(bits/秒) = 採樣率(Hz) * 採樣深度(bit) * 聲道數
    wanted_spec.format = AUDIO_S16SYS;          // 採樣格式:S錶帶符號,16是採樣深度(位深),SYS表採用系統字節序,這個宏在SDL中定義
    wanted_spec.silence = 0;                    // 靜音值
    wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));   // SDL聲音緩衝區尺寸,單位是單聲道採樣點尺寸x聲道數
    wanted_spec.callback = sdl_audio_callback;  // 回調函數,若爲NULL,則應使用SDL_QueueAudio()機制
    wanted_spec.userdata = opaque;              // 提供給回調函數的參數
    // 打開音頻設備並建立音頻處理線程。指望的參數是wanted_spec,實際獲得的硬件參數是spec
    // 1) SDL提供兩種使音頻設備取得音頻數據方法:
    //    a. push,SDL以特定的頻率調用回調函數,在回調函數中取得音頻數據
    //    b. pull,用戶程序以特定的頻率調用SDL_QueueAudio(),向音頻設備提供數據。此種狀況wanted_spec.callback=NULL
    // 2) 音頻設備打開後播放靜音,不啓動回調,調用SDL_PauseAudio(0)後啓動回調,開始正常播放音頻
    // SDL_OpenAudioDevice()第一個參數爲NULL時,等價於SDL_OpenAudio()
    while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
        av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
               wanted_spec.channels, wanted_spec.freq, SDL_GetError());
        // 若是打開音頻設備失敗,則嘗試用不一樣的聲道數或採樣率再試打開音頻設備,這裏有些奇怪,暫不深究
        wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
        if (!wanted_spec.channels) {
            wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
            wanted_spec.channels = wanted_nb_channels;
            if (!wanted_spec.freq) {
                av_log(NULL, AV_LOG_ERROR,
                       "No more combinations to try, audio open failed\n");
                return -1;
            }
        }
        wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
    }
    // 檢查打開音頻設備的實際參數:採樣格式
    if (spec.format != AUDIO_S16SYS) {
        av_log(NULL, AV_LOG_ERROR,
               "SDL advised audio format %d is not supported!\n", spec.format);
        return -1;
    }
    // 檢查打開音頻設備的實際參數:聲道數
    if (spec.channels != wanted_spec.channels) {
        wanted_channel_layout = av_get_default_channel_layout(spec.channels);
        if (!wanted_channel_layout) {
            av_log(NULL, AV_LOG_ERROR,
                   "SDL advised channel count %d is not supported!\n", spec.channels);
            return -1;
        }
    }

    // wanted_spec是指望的參數,spec是實際的參數,wanted_spec和spec都是SDL中的結構。
    // 此處audio_hw_params是FFmpeg中的參數,輸出參數供上級函數使用
    audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
    audio_hw_params->freq = spec.freq;
    audio_hw_params->channel_layout = wanted_channel_layout;
    audio_hw_params->channels =  spec.channels;
    audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
    audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
    if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
        av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
        return -1;
    }
    return spec.size;
}

打開音頻設備,涉及到FFmpeg中音頻存儲的基礎概念,爲稍顯清晰,將相關注釋摘抄以下:框架

6.1.1 音頻格式相關

**planar&packed**  
 音頻採樣格式有兩大類型:planar和packed,假設一個雙聲道音頻文件,一個左聲道採樣點記做L,一個右聲道採樣點記做R,則:  
 planar存儲格式:(plane1)LLLLLLLL...LLLL (plane2)RRRRRRRR...RRRR  
 packed存儲格式:(plane1)LRLRLRLR...........................LRLR  
 在這兩種採樣類型下,又細分多種採樣格式,如AV_SAMPLE_FMT_S1六、AV_SAMPLE_FMT_S16P等,注意SDL2.0目前不支持planar格式  

 SDL中定義音頻參數數據結構定義以下:
/**
 *  The calculated values in this structure are calculated by SDL_OpenAudio().
 *
 *  For multi-channel audio, the default SDL channel mapping is:
 *  2:  FL FR                       (stereo)
 *  3:  FL FR LFE                   (2.1 surround)
 *  4:  FL FR BL BR                 (quad)
 *  5:  FL FR FC BL BR              (quad + center)
 *  6:  FL FR FC LFE SL SR          (5.1 surround - last two can also be BL BR)
 *  7:  FL FR FC LFE BC SL SR       (6.1 surround)
 *  8:  FL FR FC LFE BL BR SL SR    (7.1 surround)
 */
typedef struct SDL_AudioSpec
{
    int freq;                   /**< DSP frequency -- samples per second */
    SDL_AudioFormat format;     /**< Audio data format */
    Uint8 channels;             /**< Number of channels: 1 mono, 2 stereo */
    Uint8 silence;              /**< Audio buffer silence value (calculated) */
    Uint16 samples;             /**< Audio buffer size in sample FRAMES (total samples divided by channel count) */
    Uint16 padding;             /**< Necessary for some compile environments */
    Uint32 size;                /**< Audio buffer size in bytes (calculated) */
    SDL_AudioCallback callback; /**< Callback that feeds the audio device (NULL to use SDL_QueueAudio()). */
    void *userdata;             /**< Userdata passed to callback (ignored for NULL callbacks). */
} SDL_AudioSpec;
SDL音頻格式定義以下:
/**
 *  \brief Audio format flags.
 *
 *  These are what the 16 bits in SDL_AudioFormat currently mean...
 *  (Unspecified bits are always zero).
 *
 *  \verbatim
    ++-----------------------sample is signed if set
    ||
    ||       ++-----------sample is bigendian if set
    ||       ||
    ||       ||          ++---sample is float if set
    ||       ||          ||
    ||       ||          || +---sample bit size---+
    ||       ||          || |                     |
    15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
    \endverbatim
 *
 *  There are macros in SDL 2.0 and later to query these bits.
 */
typedef Uint16 SDL_AudioFormat;

/**
 *  \name Audio format flags
 *
 *  Defaults to LSB byte order.
 */
/* @{ */
#define AUDIO_U8        0x0008  /**< Unsigned 8-bit samples */
#define AUDIO_S8        0x8008  /**< Signed 8-bit samples */
#define AUDIO_U16LSB    0x0010  /**< Unsigned 16-bit samples */
#define AUDIO_S16LSB    0x8010  /**< Signed 16-bit samples */
#define AUDIO_U16MSB    0x1010  /**< As above, but big-endian byte order */
#define AUDIO_S16MSB    0x9010  /**< As above, but big-endian byte order */
#define AUDIO_U16       AUDIO_U16LSB
#define AUDIO_S16       AUDIO_S16LSB
/* @} */
FFmpeg中定義音頻參數的相關數據結構爲:
// 這個結構是在ffplay.c中定義的:
typedef struct AudioParams {
    int freq;
    int channels;
    int64_t channel_layout;
    enum AVSampleFormat fmt;
    int frame_size;
    int bytes_per_sec;
} AudioParams;

/**
 * Audio sample formats
 *
 * - The data described by the sample format is always in native-endian order.
 *   Sample values can be expressed by native C types, hence the lack of a signed
 *   24-bit sample format even though it is a common raw audio data format.
 *
 * - The floating-point formats are based on full volume being in the range
 *   [-1.0, 1.0]. Any values outside this range are beyond full volume level.
 *
 * - The data layout as used in av_samples_fill_arrays() and elsewhere in FFmpeg
 *   (such as AVFrame in libavcodec) is as follows:
 *
 * @par
 * For planar sample formats, each audio channel is in a separate data plane,
 * and linesize is the buffer size, in bytes, for a single plane. All data
 * planes must be the same size. For packed sample formats, only the first data
 * plane is used, and samples for each channel are interleaved. In this case,
 * linesize is the buffer size, in bytes, for the 1 plane.
 *
 */
enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
    AV_SAMPLE_FMT_S64,         ///< signed 64 bits
    AV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};
**channel_layout**  
 channel_layout是int64_t類型,表示音頻聲道佈局,每bit表明一個特定的聲道,參考channel_layout.h中的定義:
/**
 * @defgroup channel_masks Audio channel masks
 *
 * A channel layout is a 64-bits integer with a bit set for every channel.
 * The number of bits set must be equal to the number of channels.
 * The value 0 means that the channel layout is not known.
 * @note this data structure is not powerful enough to handle channels
 * combinations that have the same channel multiple times, such as
 * dual-mono.
 *
 * @{
 */
#define AV_CH_FRONT_LEFT             0x00000001
#define AV_CH_FRONT_RIGHT            0x00000002
#define AV_CH_FRONT_CENTER           0x00000004
#define AV_CH_LOW_FREQUENCY          0x00000008
#define AV_CH_BACK_LEFT              0x00000010
#define AV_CH_BACK_RIGHT             0x00000020
#define AV_CH_FRONT_LEFT_OF_CENTER   0x00000040
#define AV_CH_FRONT_RIGHT_OF_CENTER  0x00000080
#define AV_CH_BACK_CENTER            0x00000100
#define AV_CH_SIDE_LEFT              0x00000200
#define AV_CH_SIDE_RIGHT             0x00000400
#define AV_CH_TOP_CENTER             0x00000800
#define AV_CH_TOP_FRONT_LEFT         0x00001000
#define AV_CH_TOP_FRONT_CENTER       0x00002000
#define AV_CH_TOP_FRONT_RIGHT        0x00004000
#define AV_CH_TOP_BACK_LEFT          0x00008000
#define AV_CH_TOP_BACK_CENTER        0x00010000
#define AV_CH_TOP_BACK_RIGHT         0x00020000
#define AV_CH_STEREO_LEFT            0x20000000  ///< Stereo downmix.
#define AV_CH_STEREO_RIGHT           0x40000000  ///< See AV_CH_STEREO_LEFT.
#define AV_CH_WIDE_LEFT              0x0000000080000000ULL
#define AV_CH_WIDE_RIGHT             0x0000000100000000ULL
#define AV_CH_SURROUND_DIRECT_LEFT   0x0000000200000000ULL
#define AV_CH_SURROUND_DIRECT_RIGHT  0x0000000400000000ULL
#define AV_CH_LOW_FREQUENCY_2        0x0000000800000000ULL

/** Channel mask value used for AVCodecContext.request_channel_layout
    to indicate that the user requests the channel order of the decoder output
    to be the native codec channel order. */
#define AV_CH_LAYOUT_NATIVE          0x8000000000000000ULL

/**
 * @}
 * @defgroup channel_mask_c Audio channel layouts
 * @{
 * */
#define AV_CH_LAYOUT_MONO              (AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_STEREO            (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_2POINT1           (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_1               (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_SURROUND          (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_3POINT1           (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_4POINT0           (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_4POINT1           (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_2               (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_QUAD              (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT0           (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_5POINT1           (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_5POINT0_BACK      (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT1_BACK      (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_6POINT0           (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT0_FRONT     (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_HEXAGONAL         (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1           (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_BACK      (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_FRONT     (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_7POINT0           (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT0_FRONT     (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1           (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT1_WIDE      (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_OCTAGONAL         (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_HEXADECAGONAL     (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
#define AV_CH_LAYOUT_STEREO_DOWNMIX    (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)

6.1.2 打開音頻設備

打開音頻設備並建立音頻處理線程,經過調用SDL_OpenAudio()或SDL_OpenAudioDevice()實現。輸入參數是預期的參數,輸出參數是實際參數  
 1) SDL提供兩種使音頻設備取得音頻數據方法:  
    a. push,SDL以特定的頻率調用回調函數,在回調函數中取得音頻數據  
    b. pull,用戶程序以特定的頻率調用SDL_QueueAudio(),向音頻設備提供數據。此種狀況wanted_spec.callback=NULL  
 2) 音頻設備打開後播放靜音,不啓動回調,調用SDL_PauseAudio(0)後啓動回調,開始正常播放音頻  
    SDL_OpenAudioDevice()第一個參數爲NULL時,等價於SDL_OpenAudio()

6.2 音頻重採樣

音頻重採樣在audio_decode_frame()中實現,audio_decode_frame()就是從音頻frame隊列中取出一個frame,按指定格式通過重採樣後輸出。
audio_decode_frame()函數名起得不太好,它只是進行重採樣,並不進行解碼,叫audio_resample_frame()可能更貼切。
重採樣的細節很瑣碎,直接看註釋:ide

/**
 * Decode one audio frame and return its uncompressed size.
 *
 * The processed audio frame is decoded, converted if required, and
 * stored in is->audio_buf, with size in bytes given by the return
 * value.
 */
static int audio_decode_frame(VideoState *is)
{
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    av_unused double audio_clock0;
    int wanted_nb_samples;
    Frame *af;

    if (is->paused)
        return -1;

    do {
#if defined(_WIN32)
        while (frame_queue_nb_remaining(&is->sampq) == 0) {
            if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
                return -1;
            av_usleep (1000);
        }
#endif
        // 若隊列頭部可讀,則由af指向可讀幀
        if (!(af = frame_queue_peek_readable(&is->sampq)))
            return -1;
        frame_queue_next(&is->sampq);
    } while (af->serial != is->audioq.serial);

    // 根據frame中指定的音頻參數獲取緩衝區的大小
    data_size = av_samples_get_buffer_size(NULL, af->frame->channels,   // 本行兩參數:linesize,聲道數
                                           af->frame->nb_samples,       // 本行一參數:本幀中包含的單個聲道中的樣本數
                                           af->frame->format, 1);       // 本行兩參數:採樣格式,不對齊

    // 獲取聲道佈局
    dec_channel_layout =
        (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
        af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
    // 獲取樣本數校訂值:若同步時鐘是音頻,則不調整樣本數;不然根據同步須要調整樣本數
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);

    // is->audio_tgt是SDL可接受的音頻幀數,是audio_open()中取得的參數
    // 在audio_open()函數中又有「is->audio_src = is->audio_tgt」
    // 此處表示:若是frame中的音頻參數 == is->audio_src == is->audio_tgt,那音頻重採樣的過程就免了(所以時is->swr_ctr是NULL)
    //      不然使用frame(源)和is->audio_tgt(目標)中的音頻參數來設置is->swr_ctx,並使用frame中的音頻參數來賦值is->audio_src
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
        // 使用frame(源)和is->audio_tgt(目標)中的音頻參數來設置is->swr_ctx
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        // 使用frame中的參數更新is->audio_src,第一次更新後後面基本不用執行此if分支了,由於一個音頻流中各frame通用參數同樣
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }

    if (is->swr_ctx) {
        // 重採樣輸入參數1:輸入音頻樣本數是af->frame->nb_samples
        // 重採樣輸入參數2:輸入音頻緩衝區
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        // 重採樣輸出參數1:輸出音頻緩衝區尺寸
        // 重採樣輸出參數2:輸出音頻緩衝區
        uint8_t **out = &is->audio_buf1;
        // 重採樣輸出參數:輸出音頻樣本數(多加了256個樣本)
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        // 重採樣輸出參數:輸出音頻緩衝區尺寸(以字節爲單位)
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        // 若是frame中的樣本數通過校訂,則條件成立
        if (wanted_nb_samples != af->frame->nb_samples) {
            // 重採樣補償:不清楚參數怎麼算的
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate, 
                                     wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
        // 音頻重採樣:返回值是重採樣後獲得的音頻數據中單個聲道的樣本數
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        // 重採樣返回的一幀音頻數據大小(以字節爲單位)
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else {
        // 未經重採樣,則將指針指向frame中的音頻數據
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }

    audio_clock0 = is->audio_clock;
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;
}
相關文章
相關標籤/搜索