FFmepg中文例子—指導2：加入音頻

時間 2019-11-13

標籤 ffmepg 中文例子指導加入音頻简体版

原文原文鏈接

#include "libavformat/avformat.h"  
#include "libswscale/swscale.h"  
#include <windows.h>  
  
#include <stdlib.h>  
#include <stdio.h>  
#include <string.h>  
#include <math.h>  
#include <SDL/SDL.h>  
#include <SDL/SDL_thread.h>  
  
#ifdef main  
#undef main  
#endif  
  
#define SDL_AUDIO_BUFFER_SIZE 1024  
static int sws_flags = SWS_BICUBIC;  
  
typedef struct PacketQueue  
{  
    AVPacketList *first_pkt, *last_pkt;  
    int nb_packets;  
    int size;  
    SDL_mutex *mutex;  
    SDL_cond *cond;  
} PacketQueue;  
PacketQueue audioq;  
int quit = 0;  
void packet_queue_init(PacketQueue *q)  
{  
    memset(q, 0, sizeof(PacketQueue));  
    q->mutex = SDL_CreateMutex();  
    q->cond = SDL_CreateCond();  
}  
int packet_queue_put(PacketQueue *q, AVPacket *pkt)  
{  
    AVPacketList *pkt1;  
    if(av_dup_packet(pkt) < 0)  
    {  
        return -1;  
    }  
    pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList));  
    if (!pkt1)  
        return -1;  
    pkt1->pkt = *pkt;  
    pkt1->next = NULL;  
    SDL_LockMutex(q->mutex);  
    if (!q->last_pkt)  
        q->first_pkt = pkt1;  
    else  
        q->last_pkt->next = pkt1;  
    q->last_pkt = pkt1;  
    q->nb_packets++;  
    q->size += pkt1->pkt.size;  
    SDL_CondSignal(q->cond);  
    SDL_UnlockMutex(q->mutex);  
    return 0;  
}  
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)  
{  
    AVPacketList *pkt1;  
    int ret;  
    SDL_LockMutex(q->mutex);  
    for(;;)  
    {  
        if(quit)  
        {  
            ret = -1;  
            break;  
        }  
        pkt1 = q->first_pkt;  
        if (pkt1)  
        {  
            q->first_pkt = pkt1->next;  
            if (!q->first_pkt)  
                q->last_pkt = NULL;  
            q->nb_packets--;  
            q->size -= pkt1->pkt.size;  
            *pkt = pkt1->pkt;  
            av_free(pkt1);  
            ret = 1;  
            break;  
        }  
        else if (!block)  
        {  
            ret = 0;  
            break;  
        }  
        else  
        {  
            SDL_CondWait(q->cond, q->mutex);  
        }  
    }  
    SDL_UnlockMutex(q->mutex);  
    return ret;  
}  
  
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)  
{  
    static AVPacket pkt;  
    static uint8_t *audio_pkt_data = NULL;  
    static int audio_pkt_size = 0;  
    int len1, data_size;  
    for(;;)  
    {  
        while(audio_pkt_size > 0)  
        {  
            data_size = buf_size;  
            len1 = avcodec_decode_audio2(aCodecCtx, (int16_t *)audio_buf, &data_size, audio_pkt_data, audio_pkt_size);  
            if(len1 < 0)  
            {   
                audio_pkt_size = 0;  
                break;  
            }  
            audio_pkt_data += len1;  
            audio_pkt_size -= len1;  
            if(data_size <= 0)  
            {   
                continue;  
            }   
            return data_size;  
        }  
        if(pkt.data)  
            av_free_packet(&pkt);  
        if(quit)  
        {  
            return -1;  
        }  
        if(packet_queue_get(&audioq, &pkt, 1) < 0)  
        {  
            return -1;  
        }  
        audio_pkt_data = pkt.data;  
        audio_pkt_size = pkt.size;  
    }  
}  
  
void audio_callback(void *userdata, Uint8 *stream, int len)  
{  
    AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;  
    int len1, audio_size;  
    static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];  
    static unsigned int audio_buf_size = 0;  
    static unsigned int audio_buf_index = 0;  
    while(len > 0)  
    {  
        if(audio_buf_index >= audio_buf_size)  
        {   
            audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));  
            if(audio_size < 0)  
            {   
                audio_buf_size = 1024; // arbitrary?  
                memset(audio_buf, 0, audio_buf_size);  
            }  
            else  
            {  
                audio_buf_size = audio_size;  
            }  
            audio_buf_index = 0;  
        }  
        len1 = audio_buf_size - audio_buf_index;  
        if(len1 > len)  
            len1 = len;  
        memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);  
        len -= len1;  
        stream += len1;  
        audio_buf_index += len1;  
    }  
}  
  
int main(int argc, char *argv[])  
{  
    AVFormatContext *pFormatCtx;  
    int i, videoStream(-1), audioStream(-1);  
    AVCodecContext *pCodecCtx;  
    AVCodec *pCodec;  
    AVFrame *pFrame;  
    AVPacket packet;  
    int frameFinished;  
    float aspect_ratio;  
    AVCodecContext *aCodecCtx;  
    AVCodec *aCodec;  
    SDL_Overlay *bmp;  
    SDL_Surface *screen;  
    SDL_Rect rect;  
    SDL_Event event;  
    SDL_AudioSpec wanted_spec, spec;  
    if(argc < 2)  
    {  
        fprintf(stderr, "Usage: test \n");  
        exit(1);  
    }  
  
    av_register_all();  
    pFormatCtx = av_alloc_format_context();  
    if (!pFormatCtx) {  
        fprintf(stderr, "Memory error\n");  
        exit(1);  
    }  
    if(av_open_input_file(&pFormatCtx, argv[1], NULL, 0, NULL)!=0)  
        return -1; // Couldn't open file  
    if(av_find_stream_info(pFormatCtx)<0)  
        return -1; // Couldn't find stream information  
    // Dump information about file onto standard error  
    dump_format(pFormatCtx, 0, argv[1], 0);  
  
    // Find the first video stream  
    for(i=0; i<pFormatCtx->nb_streams; i++)  
    {  
        if(pFormatCtx->streams[i]->codec->codec_type==CODEC_TYPE_VIDEO && videoStream<0)  
        {  
            videoStream=i;  
        }  
        if(pFormatCtx->streams[i]->codec->codec_type==CODEC_TYPE_AUDIO && audioStream<0)  
        {  
            audioStream=i;  
        }  
    }  
    if(videoStream==-1||audioStream==-1)  
      return -1; // Didn't find a video stream  
  
    // Get a pointer to the codec context for the video stream  
  
    aCodecCtx=pFormatCtx->streams[audioStream]->codec;  
    wanted_spec.freq = aCodecCtx->sample_rate;  
    wanted_spec.format = AUDIO_S16SYS;  
    wanted_spec.channels = aCodecCtx->channels;  
    wanted_spec.silence = 0;  
    wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;  
    wanted_spec.callback = audio_callback;  
    wanted_spec.userdata = aCodecCtx;  
    if(SDL_OpenAudio(&wanted_spec, &spec) < 0)  
    {  
        fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());  
        return -1;  
    }  
    aCodec = avcodec_find_decoder(aCodecCtx->codec_id);  
    if(!aCodec)  
    {  
        fprintf(stderr, "Unsupported codec!\n"); return -1;  
    }  
    avcodec_open(aCodecCtx, aCodec); // audio_st = pFormatCtx->streams[index]  
    packet_queue_init(&audioq);  
    SDL_PauseAudio(0);  
  
    pCodecCtx=pFormatCtx->streams[videoStream]->codec;  
    pCodec=avcodec_find_decoder(pCodecCtx->codec_id);  
    if(pCodec==NULL)  
    {  
        fprintf(stderr, "Unsupported codec!\n");  
        return -1; // Codec not found  
    }  
    // Open codec  
    if(avcodec_open(pCodecCtx, pCodec)<0)  
        return -1; // Could not open codec  
  
    // Allocate video frame  
    pFrame=avcodec_alloc_frame();  
    // Allocate an AVFrame structure  
    uint8_t *buffer;  
    int numBytes;  
    // Determine required buffer size and allocate buffer  
    numBytes=avpicture_get_size(PIX_FMT_RGB24, pCodecCtx->width,  
        pCodecCtx->height);  
    buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));  
  
    // Assign appropriate parts of buffer to image planes in pFrameRGB  
    if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))  
    {  
        fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());  
        exit(1);  
    }  
  
#ifndef __DARWIN__  
    screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);  
#else  
    screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 24, 0);  
#endif  
    if(!screen)  
    {  
        fprintf(stderr, "SDL: could not set video mode - exiting\n");  
        exit(1);  
    }  
  
    bmp = SDL_CreateYUVOverlay(pCodecCtx->width, pCodecCtx->height,  
        SDL_YV12_OVERLAY, screen);  
  
    static struct SwsContext *img_convert_ctx;  
    if (img_convert_ctx == NULL)  
    {  
        img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,  
                                         pCodecCtx->pix_fmt,  
                                         pCodecCtx->width, pCodecCtx->height,  
                                         PIX_FMT_YUV420P,  
                                         sws_flags, NULL, NULL, NULL);  
        if (img_convert_ctx == NULL)  
        {  
            fprintf(stderr, "Cannot initialize the conversion context\n");  
            exit(1);  
        }  
    }  
    i=0;  
    while(av_read_frame(pFormatCtx, &packet)>=0)  
    {  
        // Is this a packet from the video stream?  
        if(packet.stream_index==videoStream)  
        {  
            // Decode video frame  
            avcodec_decode_video3(pCodecCtx, pFrame, &frameFinished,  
                packet);  
            // Did we get a video frame?  
            if(frameFinished)  
            {  
                SDL_LockYUVOverlay(bmp);  
                AVPicture pict;  
                pict.data[0] = bmp->pixels[0];  
                pict.data[1] = bmp->pixels[2];  
                pict.data[2] = bmp->pixels[1];  
  
                pict.linesize[0] = bmp->pitches[0];  
                pict.linesize[1] = bmp->pitches[2];  
                pict.linesize[2] = bmp->pitches[1];  
  
                // Convert the image into YUV format that SDL uses  
                  
                sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize,  
                    0, pCodecCtx->height, pict.data, pict.linesize);  
                SDL_UnlockYUVOverlay(bmp);  
                rect.x = 0;  
                rect.y = 0;  
                rect.w = pCodecCtx->width;  
                rect.h = pCodecCtx->height;  
                SDL_DisplayYUVOverlay(bmp, &rect);  
                Sleep(60);  
                av_free_packet(&packet);  
            }  
        }  
        else if(packet.stream_index==audioStream)  
        {  
            packet_queue_put(&audioq, &packet);  
        }  
        else  
        {  
            av_free_packet(&packet);  
        }  
        // Free the packet that was allocated by av_read_frame  
        SDL_PollEvent(&event);  
        switch(event.type)  
        {  
        case SDL_QUIT:  
            quit = 1;  
            SDL_Quit();  
            exit(0);  
            break;  
        default: break;  
        }  
    }  
    // Free the RGB image  
    av_free(buffer);  
    //av_free(pFrameRGB);  
    // Free the YUV frame  
    av_free(pFrame);  
    // Close the codec  
    avcodec_close(pCodecCtx);  
    // Close the video file  
    av_close_input_file(pFormatCtx);  
    return 0;  
}

這個教程第一次用到了SDL的線程，涉及到了兩個線程間的同步協調，有幾個地方須要特別留意，SDL_OpenAudio庫函數會打開音頻設備（0是恢復，其餘的是暫停），SDL_PauseAudio庫函數能夠暫停或者恢復audio_callback函數的執行，程序中的這行代碼「SDL_PauseAudio(0);」執行後，讓audio_callback函數開始反覆的被調用。在這以前audio_callback回調函數尚未被調用。windows

audio_callback函數緩存

原型爲void callback(void *userdata, Uint8 *stream, int len)，userdata是輸入，stream是輸出，len是輸入，len的值通常爲4096（調試中發現的），audio_callback函數的功能是調用audio_decode_frame函數，把解碼後數據塊audio_buf追加在stream的後面，經過SDL庫對 audio_callback的不斷調用，不斷解碼數據，而後放到stream的末尾，SDL庫認爲stream中數據夠播放一幀音頻了，就播放它,第三個參數len是向stream中寫數據的內存分配尺度，是分配給audio_callback函數寫入緩存大小。app

假設len=4096，解碼後數據塊audio_buf的大小爲4608，那麼一次audio_callback調用不能把audio_buf中所有數據寫入stream末尾，就分兩次，第一次先把audio_buf的前4096個字節寫入stream末尾，第二次調用audio_callback函數時，因爲寫緩存用光了，又分配4096個字節的緩存，再寫剩餘的512個字節到stream末尾，寫緩存還剩餘3584個字節留給下次 audio_callback調用使用。ide

audio_decode_frame函數函數

原型：int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)ui

返回值：解碼完一幀音頻到緩存後，緩存佔用的實際大小，以字節爲單位，爲負數表示失敗this

aCodecCtx：輸入，解碼上下文線程

audio_buf：輸出，解碼成功後，輸出到的緩存的首地址調試

buf_size：輸入，audio_buf的預留空間code

該函數是其實是從尾部開始執行的，先取得main線程放入隊列的包，再用庫函數avcodec_decode_audio2處理，若是一次調用沒有處理完一個包的數據，記錄下處理到包的那個位置了，下次接着處理（這種狀況多是由於一個音頻包，包含多個音頻幀的數據引發）

庫函數avcodec_decode_audio2

    原型:int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
                         int *frame_size_ptr,
                         const uint8_t *buf, int buf_size);
    avctx ：解碼器上下文
    samples：輸出參數輸出數據的緩存首地址.
    frame_size_ptr：既是輸入又是輸出，無幀可解返回0，解碼失敗返回負數，解碼成功返回，解碼後一幀音頻所佔空間，以字節爲單位
    buf：輸入參數，輸入數據的緩存
    buf_size：輸入參數，buf的大小
    返回值：無幀可解返回0，解碼失敗返回負數，解碼成功返回解碼前一幀音頻所佔空間

SDL_CondWait庫函數

等待消息時解鎖，等到消息後加鎖，該函數能夠阻塞代碼的執行，通常和SDL_CondSignal庫函數（或SDL_CondBroadcast庫函數）配對使用