ffmpeg

時間 2019-12-05

標籤 ffmpeg 简体版

原文原文鏈接

認識FFMPEG FFMPEG堪稱自由軟件中最完備的一套多媒體支持庫，它幾乎實現了全部當下常見的數據封裝格式、多媒體傳輸協議以及音視頻編解碼器。所以，對於從事多媒體技術開發的工程師來講，深刻研究FFMPEG成爲一門必不可少的工做，能夠這樣說，FFMPEG之於多媒體開發工程師的重要性正如kernel之於嵌入式系統工程師通常。幾個小知識：

FFMPEG項目是由法國人Fabrice Bellard發起的，此人也是著名的CPU模擬器項目QEMU的發起者，同時仍是圓周率算法紀錄的保持者。 FF是Fast Forward的意思，翻譯成中文是「快進」。 FFMPEG的LOGO是一個」Z字掃描」示意圖，Z字掃描用於將圖像的二維頻域數據一維化，同時保證了一維化的數據具有良好的統計特性，從而提升其後要進行的一維熵編碼的效率。node

關於恥辱柱（Hall of Shame）：FFMpeg大部分代碼遵循LGPL許可證，若是使用者對FFMpeg進行了修改，要求公佈修改的源代碼；有少部分代碼遵循GPL許可證，要求使用者同時公開使用FFMpeg的軟件的源代碼。實際上，除去部分大的系統軟件開發商（Microsoft、Apple等）以及某些著名的音視頻服務提供商（Divx、Real等）提供的自有播放器以外，絕大部分第三方開發的播放器都離不開FFMpeg的支持，像Linux桌面環境中的開源播放器 VLC、MPlayer，Windows下的KMPlayer、暴風影音以及Android下幾乎所有第三方播放器都是基於FFMpeg的。也有許多看似具有自主技術的播放器，其實也都不聲不響地使用了FFMpeg，這種行爲被稱爲「盜竊」，參與「盜竊」的公司的名字則被刻在恥辱柱上，國產播放器暴風影音、QQ影音於2009年上榜。linux

一個簡單的測試程序

#include <stdio.h> #include <string.h> #include <stdlib.h>算法

#include <sys/time.h>數組

#include "libavutil/avstring.h" #include "libavformat/avformat.h" #include "libavdevice/avdevice.h" #include "libavcodec/opt.h" #include "libswscale/swscale.h"安全

#define DECODED_AUDIO_BUFFER_SIZE 192000數據結構

struct options { int streamId; int frames; int nodec; int bplay; int thread_count; int64_t lstart; char finput[256]; char foutput1[256]; char foutput2[256]; };app

int parse_options(struct options opts, int argc, char* argv) { int optidx; char *optstr;less

if (argc < 2) return -1;

opts->streamId = -1;
opts->lstart = -1;
opts->frames = -1;
opts->foutput1[0] = 0;
opts->foutput2[0] = 0;
opts->nodec = 0;
opts->bplay = 0;
opts->thread_count = 0;
strcpy(opts->finput, argv[1]);

optidx = 2;
while (optidx < argc)
{
    optstr = argv[optidx++];
    if (*optstr++ != '-') return -1;
    switch (*optstr++)
    {
    case 's':  //< stream id
        opts->streamId = atoi(optstr);
        break;
    case 'f':  //< frames
        opts->frames = atoi(optstr);
        break;
    case 'k':  //< skipped
        opts->lstart = atoll(optstr);
        break;
    case 'o':  //< output
        strcpy(opts->foutput1, optstr);
        strcat(opts->foutput1, ".mpg");
        strcpy(opts->foutput2, optstr);
        strcat(opts->foutput2, ".raw");
        break;
    case 'n': //decoding and output options
        if (strcmp("dec", optstr) == 0)
            opts->nodec = 1;
        break;
    case 'p':
        opts->bplay = 1;
        break;
    case 't':
        opts->thread_count = atoi(optstr);
        break;
    default:
        return -1;
    }
}

return 0;

}ide

void show_help(char* program) { printf("Simple FFMPEG test program\n"); printf("Usage: %s inputfile [-sstreamid [-fframes] [-kskipped] [-ooutput_filename(without extension)] [-p] [-tthread_count]]\n", program); return; }函數

static void log_callback(void* ptr, int level, const char* fmt, va_list vl) { vfprintf(stdout, fmt, vl); }

audio renderer code (oss) */ #include <sys/ioctl.h> #include <unistd.h> #include <fcntl.h> #include <sys/soundcard.h>

#define OSS_DEVICE "/dev/dsp0"

struct audio_dsp { int audio_fd; int channels; int format; int speed; }; int map_formats(enum SampleFormat format) { switch(format) { case SAMPLE_FMT_U8: return AFMT_U8; case SAMPLE_FMT_S16: return AFMT_S16_LE; default: return AFMT_U8; } } int set_audio(struct audio_dsp* dsp) { if (dsp->audio_fd == -1) { printf("Invalid audio dsp id!\n"); return -1; }

if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_SETFMT, &dsp->format))
{
    printf("Failed to set dsp format!\n");
    return -1;
}

if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_CHANNELS, &dsp->channels))
{
    printf("Failed to set dsp format!\n");
    return -1;
}

if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_SPEED, &dsp->speed))
{
    printf("Failed to set dsp format!\n");
    return -1;
}    
return 0;

}

int play_pcm(struct audio_dsp* dsp, unsigned char *buf, int size) { if (dsp->audio_fd == -1) { printf("Invalid audio dsp id!\n"); return -1; }

if (-1 == write(dsp->audio_fd, buf, size))
{
    printf("Failed to write audio dsp!\n");
    return -1;
}

return 0;

} /* audio renderer code end */

/* video renderer code*/ #include <linux/fb.h> #include <sys/mman.h>

#define FB_DEVICE "/dev/fb0"

enum pic_format { eYUV_420_Planer, }; struct video_fb { int video_fd; struct fb_var_screeninfo vinfo; struct fb_fix_screeninfo finfo; unsigned char *fbp; AVFrame *frameRGB; struct { int x; int y; } video_pos; };

int open_video(struct video_fb *fb, int x, int y) { int screensize; fb->video_fd = open(FB_DEVICE, O_WRONLY); if (fb->video_fd == -1) return -1;

if (ioctl(fb->video_fd, FBIOGET_FSCREENINFO, &fb->finfo)) return -2;
if (ioctl(fb->video_fd, FBIOGET_VSCREENINFO, &fb->vinfo)) return -2;

printf("video device: resolution %dx%d, %dbpp\n", fb->vinfo.xres, fb->vinfo.yres, fb->vinfo.bits_per_pixel);
screensize = fb->vinfo.xres * fb->vinfo.yres * fb->vinfo.bits_per_pixel / 8;
fb->fbp = (unsigned char *) mmap(0, screensize, PROT_READ|PROT_WRITE, MAP_SHARED, fb->video_fd, 0);
if (fb->fbp == -1) return -3;

if (x >= fb->vinfo.xres || y >= fb->vinfo.yres)
{
    return -4;
}
else
{
    fb->video_pos.x = x;
    fb->video_pos.y = y;
}

fb->frameRGB = avcodec_alloc_frame();
if (!fb->frameRGB) return -5;

return 0;

} #if 0 /* only 420P supported now */ int show_picture(struct video_fb *fb, AVFrame *frame, int width, int height, enum pic_format format) { struct SwsContext *sws; int i; unsigned char *dest; unsigned char *src;

if (fb->video_fd == -1) return -1;
if ((fb->video_pos.x >= fb->vinfo.xres) || (fb->video_pos.y >= fb->vinfo.yres)) return -2;

if (fb->video_pos.x + width > fb->vinfo.xres)
{
    width = fb->vinfo.xres - fb->video_pos.x;
}
if (fb->video_pos.y + height > fb->vinfo.yres)
{
    height = fb->vinfo.yres - fb->video_pos.y;
}

if (format == PIX_FMT_YUV420P)
{
    sws = sws_getContext(width, height, format, width, height, PIX_FMT_RGB32, SWS_FAST_BILINEAR, NULL, NULL, NULL);
    if (sws == 0)
    {
        return -3;
    }
    if (sws_scale(sws, frame->data, frame->linesize, 0, height, fb->frameRGB->data, fb->frameRGB->linesize))
    {
        return -3;
    }

    dest = fb->fbp + (fb->video_pos.x+fb->vinfo.xoffset) * (fb->vinfo.bits_per_pixel/8) +(fb->video_pos.y+fb->vinfo.yoffset) * fb->finfo.line_length;
    for (i = 0; i < height; i++)
    {
        memcpy(dest, src, width*4);
        src += fb->frameRGB->linesize[0];
        dest += fb->finfo.line_length;
    }
}
return 0;

} #endif void close_video(struct video_fb fb) { if (fb->video_fd != -1) { munmap(fb->fbp, fb->vinfo.xres * fb->vinfo.yres * fb->vinfo.bits_per_pixel / 8); close(fb->video_fd); fb->video_fd = -1; } } / video renderer code end */

int main(int argc, char *argv) { AVFormatContext pCtx = 0; AVCodecContext *pCodecCtx = 0; AVCodec *pCodec = 0; AVPacket packet; AVFrame *pFrame = 0; FILE *fpo1 = NULL; FILE *fpo2 = NULL; int nframe; int err; int got_picture; int picwidth, picheight, linesize; unsigned char *pBuf; int i; int64_t timestamp; struct options opt; int usefo = 0; struct audio_dsp dsp; int dusecs; float usecs1 = 0; float usecs2 = 0; struct timeval elapsed1, elapsed2; int decoded = 0;

av_register_all();

av_log_set_callback(log_callback);
av_log_set_level(50);

if (parse_options(&opt, argc, argv) < 0 || (strlen(opt.finput) == 0))
{
    show_help(argv[0]);
    return 0;
}

err = avformat_open_input(&pCtx, opt.finput, 0, 0);
if (err < 0)
{
    printf("\n->(avformat_open_input)\tERROR:\t%d\n", err);
    goto fail;
}
err = avformat_find_stream_info(pCtx, 0);
if (err < 0)
{
    printf("\n->(avformat_find_stream_info)\tERROR:\t%d\n", err);
    goto fail;
}
if (opt.streamId < 0)
{
    av_dump_format(pCtx, 0, pCtx->filename, 0);
    goto fail;
}
else
{
    printf("\n extra data in Stream %d (%dB):", opt.streamId, pCtx->streams[opt.streamId]->codec->extradata_size);
    for (i = 0; i < pCtx->streams[opt.streamId]->codec->extradata_size; i++)
    {
        if (i%16 == 0) printf("\n");
        printf("%2x  ", pCtx->streams[opt.streamId]->codec->extradata[i]);
    }
}
/* try opening output files */
if (strlen(opt.foutput1) && strlen(opt.foutput2))
{
    fpo1 = fopen(opt.foutput1, "wb");
    fpo2 = fopen(opt.foutput2, "wb");
    if (!fpo1 || !fpo2)
    {
        printf("\n->error opening output files\n");
        goto fail;
    }
    usefo = 1;
}
else
{
    usefo = 0;
}

if (opt.streamId >= pCtx->nb_streams)
{
    printf("\n->StreamId\tERROR\n");
    goto fail;
}

if (opt.lstart > 0)
{
    err = av_seek_frame(pCtx, opt.streamId, opt.lstart, AVSEEK_FLAG_ANY);
    if (err < 0)
    {
        printf("\n->(av_seek_frame)\tERROR:\t%d\n", err);
        goto fail;
    }
}

/* for decoder configuration */
if (!opt.nodec)
{
    /* prepare codec */
    pCodecCtx = pCtx->streams[opt.streamId]->codec;

    if (opt.thread_count <= 16 && opt.thread_count > 0 )
    {
        pCodecCtx->thread_count = opt.thread_count;
        pCodecCtx->thread_type = FF_THREAD_FRAME;
    }
    pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
    if (!pCodec)
    {
        printf("\n->can not find codec!\n");
        goto fail;
    }
    err = avcodec_open2(pCodecCtx, pCodec, 0);
    if (err < 0)
    {
        printf("\n->(avcodec_open)\tERROR:\t%d\n", err);
        goto fail;
    }
    pFrame = avcodec_alloc_frame();        

    /* prepare device */
    if (opt.bplay)
    {
        /* audio devices */
        dsp.audio_fd = open(OSS_DEVICE, O_WRONLY);
        if (dsp.audio_fd == -1)
        {
            printf("\n-> can not open audio device\n");
            goto fail;
        }
        dsp.channels = pCodecCtx->channels;
        dsp.speed = pCodecCtx->sample_rate;
        dsp.format = map_formats(pCodecCtx->sample_fmt);
        if (set_audio(&dsp) < 0)
        {
            printf("\n-> can not set audio device\n");
            goto fail;
        }
        /* video devices */
    }
}

nframe = 0;
while(nframe < opt.frames || opt.frames == -1)
{
    gettimeofday(&elapsed1, NULL);
    err = av_read_frame(pCtx, &packet);
    if (err < 0)
    {
        printf("\n->(av_read_frame)\tERROR:\t%d\n", err);
        break;
    }
    gettimeofday(&elapsed2, NULL);
    dusecs = (elapsed2.tv_sec - elapsed1.tv_sec)*1000000 + (elapsed2.tv_usec - elapsed1.tv_usec);
    usecs2 += dusecs;        
    timestamp = av_rescale_q(packet.dts, pCtx->streams[packet.stream_index]->time_base, (AVRational){1, AV_TIME_BASE});
    printf("\nFrame No %5d stream#%d\tsize %6dB, timestamp:%6lld, dts:%6lld, pts:%6lld, ", nframe++, packet.stream_index, packet.size, 
           timestamp, packet.dts, packet.pts);

    if (packet.stream_index == opt.streamId)
    {

#if 0 for (i = 0; i < 16; /packet.size;/ i++) { if (i%16 == 0) printf("\n pktdata: "); printf("%2x ", packet.data[i]); } printf("\n"); #endif
if (usefo) { fwrite(packet.data, packet.size, 1, fpo1); fflush(fpo1); }

if (pCtx->streams[opt.streamId]->codec->codec_type == AVMEDIA_TYPE_VIDEO && !opt.nodec)
        {
            picheight = pCtx->streams[opt.streamId]->codec->height;
            picwidth = pCtx->streams[opt.streamId]->codec->width;

            gettimeofday(&elapsed1, NULL); 
            avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, &packet);
            decoded++;
            gettimeofday(&elapsed2, NULL);
            dusecs = (elapsed2.tv_sec - elapsed1.tv_sec)*1000000 + (elapsed2.tv_usec - elapsed1.tv_usec);
            usecs1 += dusecs;

            if (got_picture)
            {
                printf("[Video: type %d, ref %d, pts %lld, pkt_pts %lld, pkt_dts %lld]", 
                        pFrame->pict_type, pFrame->reference, pFrame->pts, pFrame->pkt_pts, pFrame->pkt_dts);

                if (pCtx->streams[opt.streamId]->codec->pix_fmt == PIX_FMT_YUV420P)
                {
                    if (usefo)
                    {
                        linesize = pFrame->linesize[0];
                        pBuf = pFrame->data[0];
                        for (i = 0; i < picheight; i++)
                        {
                            fwrite(pBuf, picwidth, 1, fpo2);
                            pBuf += linesize;
                        }

                        linesize = pFrame->linesize[1];
                        pBuf = pFrame->data[1];
                        for (i = 0; i < picheight/2; i++)
                        {
                            fwrite(pBuf, picwidth/2, 1, fpo2);
                            pBuf += linesize;
                        }           

                        linesize = pFrame->linesize[2];
                        pBuf = pFrame->data[2];
                        for (i = 0; i < picheight/2; i++)
                        {
                            fwrite(pBuf, picwidth/2, 1, fpo2);
                            pBuf += linesize;
                        }  
                        fflush(fpo2);
                    } 

                    if (opt.bplay)
                    {
                        /* show picture */
                    }
                }
            }
            av_free_packet(&packet);
        }
        else if (pCtx->streams[opt.streamId]->codec->codec_type == AVMEDIA_TYPE_AUDIO && !opt.nodec)
        {
            int got;

            gettimeofday(&elapsed1, NULL);
            avcodec_decode_audio4(pCodecCtx, pFrame, &got, &packet);
            decoded++;
            gettimeofday(&elapsed2, NULL);
            dusecs = (elapsed2.tv_sec - elapsed1.tv_sec)*1000000 + (elapsed2.tv_usec - elapsed1.tv_usec);
            usecs1 += dusecs;

            if (got)
            {
                printf("[Audio: %5dB raw data, decoding time: %d]", pFrame->linesize[0], dusecs);
                if (usefo)
                {
                    fwrite(pFrame->data[0],  pFrame->linesize[0], 1, fpo2);
                    fflush(fpo2);
                }
                if (opt.bplay)
                {
                    play_pcm(&dsp, pFrame->data[0],  pFrame->linesize[0]);
                }
            }
        }
    }
}  

if (!opt.nodec && pCodecCtx)
{
    avcodec_close(pCodecCtx);
}

printf("\n%d frames parsed, average %.2f us per frame\n", nframe, usecs2/nframe);
printf("%d frames decoded, average %.2f us per frame\n", decoded, usecs1/decoded);

fail: if (pCtx) { avformat_close_input(&pCtx); } if (fpo1) { fclose(fpo1); } if (fpo2) { fclose(fpo2); } if (!pFrame) { av_free(pFrame); } if (!usefo && (dsp.audio_fd != -1)) { close(dsp.audio_fd); } return 0; }

這一小段代碼能夠實現的功能包括：

打開一個多媒體文件並獲取基本的媒體信息。獲取編碼器句柄。根據給定的時間標籤進行一個跳轉。讀取數據幀。解碼音頻幀或者視頻幀。關閉多媒體文件。

這些功能足以支持一個功能強大的多媒體播放器，由於最複雜的解複用、解碼、數據分析過程已經在FFMpeg內部實現了，須要關注的僅剩同步問題。

用戶接口 3.1 基本概念編解碼器、數據幀、媒體流和容器是數字媒體處理系統的四個基本概念。首先須要統一術語：

容器／文件（Conainer/File）：即特定格式的多媒體文件。媒體流（Stream）：指時間軸上的一段連續數據，如一段聲音數據，一段視頻數據或一段字幕數據，能夠是壓縮的，也能夠是非壓縮的，壓縮的數據須要關聯特定的編解碼器。數據幀／數據包（Frame/Packet）：一般，一個媒體流由大量的數據幀組成，對於壓縮數據，幀對應着編解碼器的最小處理單元。一般，分屬於不一樣媒體流的數據幀交錯複用於容器之中，參見交錯。編解碼器：編解碼器以幀爲單位實現壓縮數據和原始數據之間的相互轉換。

在FFMPEG中，使用AVFormatContext、AVStream、AVCodecContext、AVCodec及AVPacket等結構來抽象這些基本要素，它們的關係以下圖所示： 3.2 AVCodecContext 這是一個描述編解碼器上下文的數據結構，包含了衆多編解碼器須要的參數信息，以下列出了部分比較重要的域：

typedef struct AVCodecContext {

......

/**
 * some codecs need / can use extradata like Huffman tables.
 * mjpeg: Huffman tables
 * rv10: additional flags
 * mpeg4: global headers (they can be in the bitstream or here)
 * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
 * than extradata_size to avoid prolems if it is read with the bitstream reader.
 * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
 * - encoding: Set/allocated/freed by libavcodec.
 * - decoding: Set/allocated/freed by user.
 */
uint8_t *extradata;
int extradata_size;
/**
 * This is the fundamental unit of time (in seconds) in terms
 * of which frame timestamps are represented. For fixed-fps content,
 * timebase should be 1/framerate and timestamp increments should be
 * identically 1.
 * - encoding: MUST be set by user.
 * - decoding: Set by libavcodec.
 */
AVRational time_base;

/* video only */
/**
 * picture width / height.
 * - encoding: MUST be set by user.
 * - decoding: Set by libavcodec.
 * Note: For compatibility it is possible to set this instead of
 * coded_width/height before decoding.
 */
int width, height;

......

/* audio only */
int sample_rate; ///< samples per second
int channels;    ///< number of audio channels

/**
 * audio sample format
 * - encoding: Set by user.
 * - decoding: Set by libavcodec.
 */
enum SampleFormat sample_fmt;  ///< sample format

/* The following data should not be initialized. */
/**
 * Samples per packet, initialized when calling 'init'.
 */
int frame_size;
int frame_number;   ///< audio or video frame number

......

char codec_name[32];
enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
enum CodecID codec_id; /* see CODEC_ID_xxx */

/**
 * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
 * This is used to work around some encoder bugs.
 * A demuxer should set this to what is stored in the field used to identify the codec.
 * If there are multiple such fields in a container then the demuxer should choose the one
 * which maximizes the information about the used codec.
 * If the codec tag field in a container is larger then 32 bits then the demuxer should
 * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
 * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
 * first.
 * - encoding: Set by user, if not then the default based on codec_id will be used.
 * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
 */
unsigned int codec_tag;            

......

/**
 * Size of the frame reordering buffer in the decoder.
 * For MPEG-2 it is 1 IPB or 0 low delay IP.
 * - encoding: Set by libavcodec.
 * - decoding: Set by libavcodec.
 */
int has_b_frames;

/**
 * number of bytes per packet if constant and known or 0
 * Used by some WAV based audio codecs.
 */
int block_align;

......

/**
 * bits per sample/pixel from the demuxer (needed for huffyuv).
 * - encoding: Set by libavcodec.
 * - decoding: Set by user.
 */
 int bits_per_coded_sample;  

 ......

} AVCodecContext;

若是是單純使用libavcodec，這部分信息須要調用者進行初始化；若是是使用整個FFMPEG庫，這部分信息在調用 avformat_open_input和avformat_find_stream_info的過程當中根據文件的頭信息及媒體流內的頭部信息完成初始化。其中幾個主要域的釋義以下：

extradata/extradata_size：這個buffer中存放了解碼器可能會用到的額外信息，在av_read_frame中填充。通常來說，首先，某種具體格式的demuxer在讀取格式頭信息的時候會填充extradata，其次，若是demuxer沒有作這個事情，好比可能在頭部壓根兒就沒有相關的編解碼信息，則相應的parser會繼續從已經解複用出來的媒體流中繼續尋找。在沒有找到任何額外信息的狀況下，這個buffer指針爲空。 time_base： width/height：視頻的寬和高。 sample_rate/channels：音頻的採樣率和信道數目。 sample_fmt：音頻的原始採樣格式。 codec_name/codec_type/codec_id/codec_tag：編解碼器的信息。

3.3 AVStream 該結構體描述一個媒體流，定義以下：

typedef struct AVStream { int index; /< stream index in AVFormatContext */ int id; /< format-specific stream ID */ AVCodecContext codec; /**< codec context / / * Real base framerate of the stream. * This is the lowest framerate with which all timestamps can be * represented accurately (it is the least common multiple of all * framerates in the stream). Note, this value is just a guess! * For example, if the time base is 1/90000 and all frames have either * approximately 3600 or 1800 timer ticks, then r_frame_rate will be 50/1. */ AVRational r_frame_rate;

......

/**
 * This is the fundamental unit of time (in seconds) in terms
 * of which frame timestamps are represented. For fixed-fps content,
 * time base should be 1/framerate and timestamp increments should be 1.
 */
AVRational time_base;

......

/**
 * Decoding: pts of the first frame of the stream, in stream time base.
 * Only set this if you are absolutely 100% sure that the value you set
 * it to really is the pts of the first frame.
 * This may be undefined (AV_NOPTS_VALUE).
 * [@note](https://my.oschina.net/u/1188662) The ASF header does NOT contain a correct start_time the ASF
 * demuxer must NOT set this.
 */
int64_t start_time;
/**
 * Decoding: duration of the stream, in stream time base.
 * If a source file does not specify a duration, but does specify
 * a bitrate, this value will be estimated from bitrate and file size.
 */
int64_t duration;

#if LIBAVFORMAT_VERSION_INT < (53<<16) char language[4]; /** ISO 639-2/B 3-letter language code (empty string if undefined) */ #endif

/* av_read_frame() support */
enum AVStreamParseType need_parsing;
struct AVCodecParserContext *parser;

......

/* av_seek_frame() support */
AVIndexEntry *index_entries; /**< Only used if the format does not
                                support seeking natively. */
int nb_index_entries;
unsigned int index_entries_allocated_size;

int64_t nb_frames;                 ///< number of frames in this stream if known or 0

......

/**
 * Average framerate
 */
AVRational avg_frame_rate;
......

} AVStream;

主要域的釋義以下，其中大部分域的值能夠由avformat_open_input根據文件頭的信息肯定，缺乏的信息須要經過調用avformat_find_stream_info讀幀及軟解碼進一步獲取：

index/id：index對應流的索引，這個數字是自動生成的，根據index能夠從AVFormatContext::streams表中索引到該流；而id則是流的標識，依賴於具體的容器格式。好比對於MPEG TS格式，id就是pid。
time_base：流的時間基準，是一個實數，該流中媒體數據的pts和dts都將以這個時間基準爲粒度。一般，使用av_rescale/av_rescale_q能夠實現不一樣時間基準的轉換。
start_time：流的起始時間，以流的時間基準爲單位，一般是該流中第一個幀的pts。
duration：流的總時間，以流的時間基準爲單位。
need_parsing：對該流parsing過程的控制域。
nb_frames：流內的幀數目。
r_frame_rate/framerate/avg_frame_rate：幀率相關。
codec：指向該流對應的AVCodecContext結構，調用avformat_open_input時生成。
parser：指向該流對應的AVCodecParserContext結構，調用avformat_find_stream_info時生成。。

3.4 AVFormatContext

typedef struct AVFormatContext { const AVClass *av_class; /< Set by avformat_alloc_context. / / Can only be iformat or oformat, not both at the same time. */ struct AVInputFormat *iformat; struct AVOutputFormat *oformat; void *priv_data; ByteIOContext *pb; unsigned int nb_streams; AVStream *streams[MAX_STREAMS]; char filename[1024]; /< input or output filename / / stream info */ int64_t timestamp; #if LIBAVFORMAT_VERSION_INT < (53<<16) char title[512]; char author[512]; char copyright[512]; char comment[512]; char album[512]; int year; /< ID3 year, 0 if none */ int track; /< track number, 0 if none */ char genre[32]; /**< ID3 genre */ #endif

int ctx_flags; /**< Format-specific flags, see AVFMTCTX_xx */
/* private data for pts handling (do not modify directly). */
/** This buffer is only needed when packets were already buffered but
   not decoded, for example to get the codec parameters in MPEG
   streams. */
struct AVPacketList *packet_buffer;

/** Decoding: position of the first frame of the component, in
   AV_TIME_BASE fractional seconds. NEVER set this value directly:
   It is deduced from the AVStream values.  */
int64_t start_time;
/** Decoding: duration of the stream, in AV_TIME_BASE fractional
   seconds. Only set this value if you know none of the individual stream
   durations and also dont set any of them. This is deduced from the
   AVStream values if not set.  */
int64_t duration;
/** decoding: total file size, 0 if unknown */
int64_t file_size;
/** Decoding: total stream bitrate in bit/s, 0 if not
   available. Never set it directly if the file_size and the
   duration are known as FFmpeg can compute it automatically. */
int bit_rate;

/* av_read_frame() support */
AVStream *cur_st;

#if LIBAVFORMAT_VERSION_INT < (53<<16) const uint8_t *cur_ptr_deprecated; int cur_len_deprecated; AVPacket cur_pkt_deprecated; #endif

/* av_seek_frame() support */
int64_t data_offset; /** offset of the first packet */
int index_built;

int mux_rate;
unsigned int packet_size;
int preload;
int max_delay;

#define AVFMT_NOOUTPUTLOOP -1 #define AVFMT_INFINITEOUTPUTLOOP 0 /** number of times to loop output in formats that support it */ int loop_output;

int flags;

#define AVFMT_FLAG_GENPTS 0x0001 ///< Generate missing pts even if it requires parsing future frames. #define AVFMT_FLAG_IGNIDX 0x0002 ///< Ignore index. #define AVFMT_FLAG_NONBLOCK 0x0004 ///< Do not block when reading packets from input. #define AVFMT_FLAG_IGNDTS 0x0008 ///< Ignore DTS on frames that contain both DTS & PTS #define AVFMT_FLAG_NOFILLIN 0x0010 ///< Do not infer any values from other values, just return what is stored in the container #define AVFMT_FLAG_NOPARSE 0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled #define AVFMT_FLAG_RTP_HINT 0x0040 ///< Add RTP hinting to the output file

int loop_input;
/** decoding: size of data to probe; encoding: unused. */
unsigned int probesize;

/**
 * Maximum time (in AV_TIME_BASE units) during which the input should
 * be analyzed in avformat_find_stream_info().
 */
int max_analyze_duration;

const uint8_t *key;
int keylen;

unsigned int nb_programs;
AVProgram **programs;

/**
 * Forced video codec_id.
 * Demuxing: Set by user.
 */
enum CodecID video_codec_id;
/**
 * Forced audio codec_id.
 * Demuxing: Set by user.
 */
enum CodecID audio_codec_id;
/**
 * Forced subtitle codec_id.
 * Demuxing: Set by user.
 */
enum CodecID subtitle_codec_id;

/**
 * Maximum amount of memory in bytes to use for the index of each stream.
 * If the index exceeds this size, entries will be discarded as
 * needed to maintain a smaller size. This can lead to slower or less
 * accurate seeking (depends on demuxer).
 * Demuxers for which a full in-memory index is mandatory will ignore
 * this.
 * muxing  : unused
 * demuxing: set by user
 */
unsigned int max_index_size;

/**
 * Maximum amount of memory in bytes to use for buffering frames
 * obtained from realtime capture devices.
 */
unsigned int max_picture_buffer;

unsigned int nb_chapters;
AVChapter **chapters;

/**
 * Flags to enable debugging.
 */
int debug;

#define FF_FDEBUG_TS 0x0001

/**
 * Raw packets from the demuxer, prior to parsing and decoding.
 * This buffer is used for buffering packets until the codec can
 * be identified, as parsing cannot be done without knowing the
 * codec.
 */
struct AVPacketList *raw_packet_buffer;
struct AVPacketList *raw_packet_buffer_end;

struct AVPacketList *packet_buffer_end;

AVMetadata *metadata;

/**
 * Remaining size available for raw_packet_buffer, in bytes.
 * NOT PART OF PUBLIC API
 */

#define RAW_PACKET_BUFFER_SIZE 2500000 int raw_packet_buffer_remaining_size;

/**
 * Start time of the stream in real world time, in microseconds
 * since the unix epoch (00:00 1st January 1970). That is, pts=0
 * in the stream was captured at this real world time.
 * - encoding: Set by user.
 * - decoding: Unused.
 */
int64_t start_time_realtime;

} AVFormatContext;

這是FFMpeg中最爲基本的一個結構，是其餘全部結構的根，是一個多媒體文件或流的根本抽象。其中:

nb_streams和streams所表示的AVStream結構指針數組包含了全部內嵌媒體流的描述； iformat和oformat指向對應的demuxer和muxer指針； pb則指向一個控制底層數據讀寫的ByteIOContext結構。 start_time和duration是從streams數組的各個AVStream中推斷出的多媒體文件的起始時間和長度，以微妙爲單位。

一般，這個結構由avformat_open_input在內部建立並以缺省值初始化部分紅員。可是，若是調用者但願本身建立該結構，則須要顯式爲該結構的一些成員置缺省值——若是沒有缺省值的話，會致使以後的動做產生異常。如下成員須要被關注：

probesize mux_rate packet_size flags max_analyze_duration key max_index_size max_picture_buffer max_delay

3.5 AVPacket

typedef struct AVPacket { /** * Presentation timestamp in AVStream->time_base units; the time at which * the decompressed packet will be presented to the user. * Can be AV_NOPTS_VALUE if it is not stored in the file. * pts MUST be larger or equal to dts as presentation cannot happen before * decompression, unless one wants to view hex dumps. Some formats misuse * the terms dts and pts/cts to mean something different. Such timestamps * must be converted to true pts/dts before they are stored in AVPacket. / int64_t pts; /* * Decompression timestamp in AVStream->time_base units; the time at which * the packet is decompressed. * Can be AV_NOPTS_VALUE if it is not stored in the file. / int64_t dts; uint8_t data; int size; int stream_index; int flags; / * Duration of this packet in AVStream->time_base units, 0 if unknown. * Equals next_pts - this_pts in presentation order. */ int duration; void (*destruct)(struct AVPacket *); void *priv; int64_t pos; ///< byte position in stream, -1 if unknown

/**
 * Time difference in AVStream->time_base units from the pts of this
 * packet to the point at which the output from the decoder has converged
 * independent from the availability of previous frames. That is, the
 * frames are virtually identical no matter if decoding started from
 * the very first frame or from this keyframe.
 * Is AV_NOPTS_VALUE if unknown.
 * This field is not the display duration of the current packet.
 *
 * The purpose of this field is to allow seeking in streams that have no
 * keyframes in the conventional sense. It corresponds to the
 * recovery point SEI in H.264 and match_time_delta in NUT. It is also
 * essential for some types of subtitle streams to ensure that all
 * subtitles are correctly displayed after seeking.
 */
int64_t convergence_duration;

} AVPacket;

FFMPEG使用AVPacket來暫存解複用以後、解碼以前的媒體數據（一個音/視頻幀、一個字幕包等）及附加信息（解碼時間戳、顯示時間戳、時長等）。其中：

dts表示解碼時間戳，pts表示顯示時間戳，它們的單位是所屬媒體流的時間基準。 stream_index給出所屬媒體流的索引； data爲數據緩衝區指針，size爲長度； duration爲數據的時長，也是以所屬媒體流的時間基準爲單位； pos表示該數據在媒體流中的字節偏移量； destruct爲用於釋放數據緩衝區的函數指針； flags爲標誌域，其中，最低爲置1表示該數據是一個關鍵幀。

AVPacket結構自己只是個容器，它使用data成員引用實際的數據緩衝區。這個緩衝區一般是由av_new_packet建立的，但也可能由 FFMPEG的API建立（如av_read_frame）。當某個AVPacket結構的數據緩衝區再也不被使用時，要須要經過調用 av_free_packet釋放。av_free_packet調用的是結構體自己的destruct函數，它的值有兩種情況：

(1)av_destruct_packet_nofree或0；

(2)av_destruct_packet，

其中，狀況(1)僅僅是將data和 size的值清0而已，狀況(2)纔會真正地釋放緩衝區。

FFMPEG內部使用AVPacket結構創建緩衝區裝載數據，同時提供destruct函數，若是FFMPEG打算本身維護緩衝區，則將 destruct設爲av_destruct_packet_nofree，用戶調用av_free_packet清理緩衝區時並不可以將其釋放；若是 FFMPEG打算將該緩衝區完全交給調用者，則將destruct設爲av_destruct_packet，表示它可以被釋放。安全起見，若是用戶但願自由地使用一個FFMPEG內部建立的AVPacket結構，最好調用av_dup_packet進行緩衝區的克隆，將其轉化爲緩衝區可以被釋放的 AVPacket，以避免對緩衝區的不當佔用形成異常錯誤。av_dup_packet會爲destruct指針爲 av_destruct_packet_nofree的AVPacket新建一個緩衝區，而後將原緩衝區的數據拷貝至新緩衝區，置data的值爲新緩衝區的地址，同時設destruct指針爲av_destruct_packet。

時間 4.1 時間信息時間信息用於實現多媒體同步。同步的目的在於展現多媒體信息時，可以保持媒體對象之間固有的時間關係。同步有兩類，一類是流內同步，其主要任務是保證單個媒體流內的時間關係，以知足感知要求，如按照規定的幀率播放一段視頻；另外一類是流間同步，主要任務是保證不一樣媒體流之間的時間關係，如音頻和視頻之間的關係（lipsync）。

對於固定速率的媒體，如固定幀率的視頻或固定比特率的音頻，能夠將時間信息（幀率或比特率）置於文件首部（header），如AVI的hdrl List、MP4的moov box，還有一種相對複雜的方案是將時間信息嵌入媒體流的內部，如MPEG TS和Real video，這種方案能夠處理變速率的媒體，亦可有效避免同步過程當中的時間漂移。

FFMPEG會爲每個數據包打上時間標籤，以更有效地支持上層應用的同步機制。時間標籤有兩種，一種是DTS，稱爲解碼時間標籤，另外一種是PTS，稱爲顯示時間標籤。對於聲音來講，這兩個時間標籤是相同的，但對於某些視頻編碼格式，因爲採用了雙向預測技術，會形成DTS和PTS的不一致。

無雙向預測幀的狀況：

圖像類型: I P P P P P P … I P P

DTS: 0 1 2 3 4 5 6… 100 101 102

PTS: 0 1 2 3 4 5 6… 100 101 102

有雙向預測幀的狀況：

圖像類型: I P B B P B B … I P B

DTS: 0 1 2 3 4 5 6 … 100 101 102

PTS: 0 3 1 2 6 4 5 … 100 104 102

對於存在雙向預測幀的狀況，一般要求解碼器對圖像重排序，以保證輸出的圖像順序爲顯示順序：

解碼器輸入：I P B B P B B

(DTS) 0 1 2 3 4 5 6

(PTS) 0 3 1 2 6 4 5

解碼器輸出：X I B B P B B P

(PTS) X 0 1 2 3 4 5 6

4.2 時間信息的獲取經過調用avformat_find_stream_info，多媒體應用能夠從AVFormatContext對象中拿到媒體文件的時間信息：主要是總時間長度和開始時間，此外還有與時間信息相關的比特率和文件大小。其中時間信息的單位是AV_TIME_BASE：微秒。

typedef struct AVFormatContext {

......

/** Decoding: position of the first frame of the component, in
   AV_TIME_BASE fractional seconds. NEVER set this value directly:
   It is deduced from the AVStream values.  */
int64_t start_time;
/** Decoding: duration of the stream, in AV_TIME_BASE fractional
   seconds. Only set this value if you know none of the individual stream
   durations and also dont set any of them. This is deduced from the
   AVStream values if not set.  */
int64_t duration;
/** decoding: total file size, 0 if unknown */
int64_t file_size;
/** Decoding: total stream bitrate in bit/s, 0 if not
   available. Never set it directly if the file_size and the
   duration are known as FFmpeg can compute it automatically. */
int bit_rate;

......

} AVFormatContext;

以上4個成員變量都是隻讀的，基於FFMpeg的中間件須要將其封裝到某個接口中，如：

LONG GetDuratioin(IntfX*);

LONG GetStartTime(IntfX*);

LONG GetFileSize(IntfX*);

LONG GetBitRate(IntfX*);

APIs 5.1 avformat_open_input

int avformat_open_input(AVFormatContext **ic_ptr, const char *filename, AVInputFormat *fmt, AVDictionary **options);

avformat_open_input完成兩個任務：

打開一個文件或URL，基於字節流的底層輸入模塊獲得初始化。解析多媒體文件或多媒體流的頭信息，建立AVFormatContext結構並填充其中的關鍵字段，依次爲各個原始流創建AVStream結構。

一個多媒體文件或多媒體流與其包含的原始流的關係以下：

多媒體文件/多媒體流 (movie.mkv)

原始流 1 (h.264 video)

原始流 2 (aac audio for Chinese)

原始流 3 (aac audio for english)

原始流 4 (Chinese Subtitle)

原始流 5 (English Subtitle)

…

關於輸入參數：

ic_ptr，這是一個指向指針的指針，用於返回avformat_open_input內部構造的一個AVFormatContext結構體。 filename，指定文件名。 fmt，用於顯式指定輸入文件的格式，若是設爲空則自動判斷其輸入格式。 options

這個函數經過解析多媒體文件或流的頭信息及其餘輔助數據，可以獲取足夠多的關於文件、流和編解碼器的信息，但因爲任何一種多媒體格式提供的信息都是有限的，並且不一樣的多媒體內容製做軟件對頭信息的設置不盡相同，此外這些軟件在產生多媒體內容時不免會引入一些錯誤，所以這個函數並不保證可以獲取全部須要的信息，在這種狀況下，則須要考慮另外一個函數：avformat_find_stream_info。

5.2 avformat_find_stream_info

int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);

這個函數主要用於獲取必要的編解碼器參數，設置到ic→streams[i]→codec中。

首先必須獲得各媒體流對應編解碼器的類型和id，這是兩個定義在avutils.h和avcodec.h中的枚舉：

enum AVMediaType { AVMEDIA_TYPE_UNKNOWN = -1, AVMEDIA_TYPE_VIDEO, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_DATA, AVMEDIA_TYPE_SUBTITLE, AVMEDIA_TYPE_ATTACHMENT, AVMEDIA_TYPE_NB }; enum CodecID { CODEC_ID_NONE,

/* video codecs */
CODEC_ID_MPEG1VIDEO,
CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
CODEC_ID_MPEG2VIDEO_XVMC,
CODEC_ID_H261,
CODEC_ID_H263,
...

};

一般，若是某種媒體格式具有完備而正確的頭信息，調用avformat_open_input便可以獲得這兩個參數，但如果因某種緣由avformat_open_input沒法獲取它們，這一任務將由avformat_find_stream_info完成。

其次還要獲取各媒體流對應編解碼器的時間基準。

此外，對於音頻編解碼器，還須要獲得：

採樣率，聲道數，位寬，幀長度（對於某些編解碼器是必要的），

對於視頻編解碼器，則是：

圖像大小，色彩空間及格式，

5.3 av_read_frame

int av_read_frame(AVFormatContext *s, AVPacket *pkt);

這個函數用於從多媒體文件或多媒體流中讀取媒體數據，獲取的數據由AVPacket結構pkt來存放。對於音頻數據，若是是固定比特率，則pkt中裝載着一個或多個音頻幀；若是是可變比特率，則pkt中裝載有一個音頻幀。對於視頻數據，pkt中裝載有一個視頻幀。須要注意的是：再次調用本函數以前，必須使用av_free_packet釋放pkt所佔用的資源。

經過pkt→stream_index能夠查到獲取的媒體數據的類型，從而將數據送交相應的解碼器進行後續處理。

5.4 av_seek_frame

int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int flags);

這個函數經過改變媒體文件的讀寫指針來實現對媒體文件的隨機訪問，支持如下三種方式：

基於時間的隨機訪問：具體而言就是將媒體文件讀寫指針定位到某個給定的時間點上，則以後調用av_read_frame時可以讀到時間標籤等於給定時間點的媒體數據，一般用於實現媒體播放器的快進、快退等功能。基於文件偏移的隨機訪問：至關於普通文件的seek函數，timestamp也成爲文件的偏移量。基於幀號的隨機訪問：timestamp爲要訪問的媒體數據的幀號。

關於參數：

s：是個AVFormatContext指針，就是avformat_open_input返回的那個結構。 stream_index：指定媒體流，若是是基於時間的隨機訪問，則第三個參數timestamp將以此媒體流的時間基準爲單位；若是設爲負數，則至關於不指定具體的媒體流，FFMPEG會按照特定的算法尋找缺省的媒體流，此時，timestamp的單位爲AV_TIME_BASE（微秒）。 timestamp：時間標籤，單位取決於其餘參數。 flags：定位方式，AVSEEK_FLAG_BYTE表示基於字節偏移，AVSEEK_FLAG_FRAME表示基於幀號，其它表示基於時間。

5.5 av_close_input_file

void av_close_input_file(AVFormatContext *s);

關閉一個媒體文件：釋放資源，關閉物理IO。

5.6 avcodec_find_decoder

AVCodec *avcodec_find_decoder(enum CodecID id); AVCodec *avcodec_find_decoder_by_name(const char *name);

根據給定的codec id或解碼器名稱從系統中搜尋並返回一個AVCodec結構的指針。

5.7 avcodec_open

int avcodec_open(AVCodecContext *avctx, AVCodec *codec);

此函數根據輸入的AVCodec指針具體化AVCodecContext結構。在調用該函數以前，須要首先調用 avcodec_alloc_context分配一個AVCodecContext結構，或調用avformat_open_input獲取媒體文件中對應媒體流的AVCodecContext結構；此外還須要經過avcodec_find_decoder獲取AVCodec結構。

這一函數還將初始化對應的解碼器。

5.8 avcodec_decode_video2

int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture, int *got_picture_ptr, AVPacket *avpkt);

解碼一個視頻幀。got_picture_ptr指示是否有解碼數據輸出。

輸入數據在AVPacket結構中，輸出數據在AVFrame結構中。AVFrame是定義在avcodec.h中的一個數據結構：

typedef struct AVFrame { FF_COMMON_FRAME } AVFrame;

FF_COMMON_FRAME定義了諸多數據域，大部分由FFMpeg內部使用，對於用戶來講，比較重要的主要包括：

#define FF_COMMON_FRAME ...... uint8_t *data[4]; int linesize[4]; int key_frame; int pict_type; int64_t pts;\
int reference;\
......

FFMpeg內部以planar的方式存儲原始圖像數據，即將圖像像素分爲多個平面（R/G/B或Y/U/V），data數組內的指針分別指向四個像素平面的起始位置，linesize數組則存放各個存貯各個平面的緩衝區的行寬：

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++

+++data[0]->#################################++++++++++++

++++++++++++###########picture data##########++++++++++++