ffmpeg綜合應用示例（一）——攝像頭直播

時間 2020-07-13

標籤 ffmpeg 綜合應用示例攝像頭直播简体版

原文原文鏈接

本文的示例將實現：讀取PC攝像頭視頻數據並以RTMP協議發送爲直播流。示例包含了網絡

一、ffmpeg的libavdevice的使用多線程

二、視頻解碼、編碼、推流的基本流程ide

具備較強的綜合性。函數

要使用libavdevice的相關函數，首先須要註冊相關組件ui

[cpp] view plain copy

avdevice_register_all();

接下來咱們要列出電腦中可用的dshow設備編碼

[cpp] view plain copy

AVFormatContext *pFmtCtx = avformat_alloc_context();
AVDeviceInfoList *device_info = NULL;
AVDictionary* options = NULL;
av_dict_set(&options, "list_devices", "true", 0);
AVInputFormat *iformat = av_find_input_format("dshow");
printf("Device Info=============\n");
avformat_open_input(&pFmtCtx, "video=dummy", iformat, &options);
printf("========================\n");

能夠看到這裏打開設備的步驟基本與打開文件的步驟相同，上面的代碼中設置了AVDictionary，這樣與在命令行中輸入下列命令有相同的效果spa

[cpp] view plain copy

ffmpeg -list_devices true -f dshow -i dummy

以上語句獲得的結果以下.net

這裏個人電腦上只有一個虛擬攝像頭軟件虛擬出來的幾個dshow設備，沒有音頻設備，因此有如上的結果。命令行

須要說明的是，avdevice有一個avdevice_list_devices函數能夠枚舉系統的採集設備，包括設備名和設備描述，很是適合用於讓用戶選擇要使用的設備，可是不支持dshow設備，因此這裏沒有使用它。線程

下一步就能夠像打開普通文件同樣將上面的具體設備名做爲輸入打開，並進行相應的初始化設置，以下

[cpp] view plain copy

av_register_all();
//Register Device
avdevice_register_all();
avformat_network_init();
//Show Dshow Device
show_dshow_device();
printf("\nChoose capture device: ");
if (gets(capture_name) == 0)
{
printf("Error in gets()\n");
return -1;
}
sprintf(device_name, "video=%s", capture_name);
ifmt=av_find_input_format("dshow");
//Set own video device's name
if (avformat_open_input(&ifmt_ctx, device_name, ifmt, NULL) != 0){
printf("Couldn't open input stream.（沒法打開輸入流）\n");
return -1;
}
//input initialize
if (avformat_find_stream_info(ifmt_ctx, NULL)<0)
{
printf("Couldn't find stream information.（沒法獲取流信息）\n");
return -1;
}
videoindex = -1;
for (i = 0; i<ifmt_ctx->nb_streams; i++)
if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoindex = i;
break;
}
if (videoindex == -1)
{
printf("Couldn't find a video stream.（沒有找到視頻流）\n");
return -1;
}
if (avcodec_open2(ifmt_ctx->streams[videoindex]->codec, avcodec_find_decoder(ifmt_ctx->streams[videoindex]->codec->codec_id), NULL)<0)
{
printf("Could not open codec.（沒法打開解碼器）\n");
return -1;
}

在選擇了輸入設備並進行相關初始化以後，須要對輸出作相應的初始化。ffmpeg將網絡協議和文件同等看待，同時由於使用RTMP協議進行傳輸，這裏咱們指定輸出爲flv格式，編碼器使用H.264

[cpp] view plain copy

//output initialize
avformat_alloc_output_context2(&ofmt_ctx, NULL, "flv", out_path);
//output encoder initialize
pCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!pCodec){
printf("Can not find encoder! (沒有找到合適的編碼器！)\n");
return -1;
}
pCodecCtx=avcodec_alloc_context3(pCodec);
pCodecCtx->pix_fmt = PIX_FMT_YUV420P;
pCodecCtx->width = ifmt_ctx->streams[videoindex]->codec->width;
pCodecCtx->height = ifmt_ctx->streams[videoindex]->codec->height;
pCodecCtx->time_base.num = 1;
pCodecCtx->time_base.den = 25;
pCodecCtx->bit_rate = 400000;
pCodecCtx->gop_size = 250;
/* Some formats,for example,flv, want stream headers to be separate. */
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
pCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
//H264 codec param
//pCodecCtx->me_range = 16;
//pCodecCtx->max_qdiff = 4;
//pCodecCtx->qcompress = 0.6;
pCodecCtx->qmin = 10;
pCodecCtx->qmax = 51;
//Optional Param
pCodecCtx->max_b_frames = 3;
// Set H264 preset and tune
AVDictionary *param = 0;
av_dict_set(&param, "preset", "fast", 0);
av_dict_set(&param, "tune", "zerolatency", 0);
if (avcodec_open2(pCodecCtx, pCodec,&param) < 0){
printf("Failed to open encoder! (編碼器打開失敗！)\n");
return -1;
}
//Add a new stream to output,should be called by the user before avformat_write_header() for muxing
video_st = avformat_new_stream(ofmt_ctx, pCodec);
if (video_st == NULL){
return -1;
}
video_st->time_base.num = 1;
video_st->time_base.den = 25;
video_st->codec = pCodecCtx;
//Open output URL,set before avformat_write_header() for muxing
if (avio_open(&ofmt_ctx->pb,out_path, AVIO_FLAG_READ_WRITE) < 0){
printf("Failed to open output file! (輸出文件打開失敗！)\n");
return -1;
}
//Show some Information
av_dump_format(ofmt_ctx, 0, out_path, 1);
//Write File Header
avformat_write_header(ofmt_ctx,NULL);

完成輸入和輸出的初始化以後，就能夠正式開始解碼和編碼並推流的流程了，這裏要注意，攝像頭數據每每是RGB格式的，須要將其轉換爲YUV420P格式，因此要先作以下的準備工做

[cpp] view plain copy

//prepare before decode and encode
dec_pkt = (AVPacket *)av_malloc(sizeof(AVPacket));
//enc_pkt = (AVPacket *)av_malloc(sizeof(AVPacket));
//camera data has a pix fmt of RGB,convert it to YUV420
img_convert_ctx = sws_getContext(ifmt_ctx->streams[videoindex]->codec->width, ifmt_ctx->streams[videoindex]->codec->height,
ifmt_ctx->streams[videoindex]->codec->pix_fmt, pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
pFrameYUV = avcodec_alloc_frame();
uint8_t *out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);

下面就能夠正式開始解碼、編碼和推流了

[cpp] view plain copy

//start decode and encode
int64_t start_time=av_gettime();
while (av_read_frame(ifmt_ctx, dec_pkt) >= 0){
if (exit_thread)
break;
av_log(NULL, AV_LOG_DEBUG, "Going to reencode the frame\n");
pframe = av_frame_alloc();
if (!pframe) {
ret = AVERROR(ENOMEM);
return -1;
}
//av_packet_rescale_ts(dec_pkt, ifmt_ctx->streams[dec_pkt->stream_index]->time_base,
// ifmt_ctx->streams[dec_pkt->stream_index]->codec->time_base);
ret = avcodec_decode_video2(ifmt_ctx->streams[dec_pkt->stream_index]->codec, pframe,
&dec_got_frame, dec_pkt);
if (ret < 0) {
av_frame_free(&pframe);
av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
break;
}
if (dec_got_frame){
sws_scale(img_convert_ctx, (const uint8_t* const*)pframe->data, pframe->linesize, 0, pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize);
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_video2(pCodecCtx, &enc_pkt, pFrameYUV, &enc_got_frame);
av_frame_free(&pframe);
if (enc_got_frame == 1){
//printf("Succeed to encode frame: %5d\tsize:%5d\n", framecnt, enc_pkt.size);
framecnt++;
enc_pkt.stream_index = video_st->index;
//Write PTS
AVRational time_base = ofmt_ctx->streams[videoindex]->time_base;//{ 1, 1000 };
AVRational r_framerate1 = ifmt_ctx->streams[videoindex]->r_frame_rate;// { 50, 2 };
AVRational time_base_q = { 1, AV_TIME_BASE };
//Duration between 2 frames (us)
int64_t calc_duration = (double)(AV_TIME_BASE)*(1 / av_q2d(r_framerate1)); //內部時間戳
//Parameters
//enc_pkt.pts = (double)(framecnt*calc_duration)*(double)(av_q2d(time_base_q)) / (double)(av_q2d(time_base));
enc_pkt.pts = av_rescale_q(framecnt*calc_duration, time_base_q, time_base);
enc_pkt.dts = enc_pkt.pts;
enc_pkt.duration = av_rescale_q(calc_duration, time_base_q, time_base); //(double)(calc_duration)*(double)(av_q2d(time_base_q)) / (double)(av_q2d(time_base));
enc_pkt.pos = -1;
//Delay
int64_t pts_time = av_rescale_q(enc_pkt.dts, time_base, time_base_q);
int64_t now_time = av_gettime() - start_time;
if (pts_time > now_time)
av_usleep(pts_time - now_time);
ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
av_free_packet(&enc_pkt);
}
}
else {
av_frame_free(&pframe);
}
av_free_packet(dec_pkt);
}

解碼部分比較簡單，編碼部分須要本身計算PTS、DTS，比較複雜。這裏經過幀率計算PTS和DTS

首先經過幀率計算每兩幀之間的時間間隔，可是要換算爲ffmpeg內部的時間基表示的值。所謂ffmpeg內部的時間基即AV_TIME_BASE，定義爲

[cpp] view plain copy

#define AV_TIME_BASE 1000000

任何以秒爲單位的時間值都經過下式轉換爲ffmpeg內部時間基表示的時間值，其實就是轉換爲了微秒

[cpp] view plain copy

timestamp=AV_TIME_BASE*time(s)

因此有

[cpp] view plain copy

//Duration between 2 frames (us)
int64_t calc_duration = (double)(AV_TIME_BASE)*(1 / av_q2d(r_framerate1)); //內部時間戳

而enc_pkt由於是要寫入最後的輸出碼流的，它的PTS、DTS應該是以ofmt_ctx->streams[videoindex]->time_base爲時間基來表示的，時間基之間的轉換用下式

[cpp] view plain copy

enc_pkt.pts = av_rescale_q(framecnt*calc_duration, time_base_q, time_base);

其實就是

[cpp] view plain copy

enc_pkt.pts = (double)(framecnt*calc_duration)*(double)(av_q2d(time_base_q)) / (double)(av_q2d(time_base));

很是簡單的數學轉換。

還有一點，由於轉碼流程可能比實際的播放快不少，爲保持流暢的播放，要判斷DTS和當前真實時間，並進行相應的延時操做，以下

[cpp] view plain copy

//Delay
int64_t pts_time = av_rescale_q(enc_pkt.dts, time_base, time_base_q);
int64_t now_time = av_gettime() - start_time;
if (pts_time > now_time)
av_usleep(pts_time - now_time);

這裏正好與以前相反，要將ofmt_ctx->streams[videoindex]->time_base時間基轉換爲ffmpeg內部時間基，由於av_gettime得到的就是以微秒爲單位的時間

整體流程完畢以後，還剩下最後的flush encoder操做，輸出以前存儲在緩衝區內的數據

[cpp] view plain copy

//Flush Encoder
ret = flush_encoder(ifmt_ctx,ofmt_ctx,0,framecnt);
if (ret < 0) {
printf("Flushing encoder failed\n");
return -1;
}
//Write file trailer
av_write_trailer(ofmt_ctx);
//Clean
if (video_st)
avcodec_close(video_st->codec);
av_free(out_buffer);
avio_close(ofmt_ctx->pb);
avformat_free_context(ifmt_ctx);
avformat_free_context(ofmt_ctx);

flush_encoder的內容以下

[cpp] view plain copy

int flush_encoder(AVFormatContext *ifmt_ctx, AVFormatContext *ofmt_ctx, unsigned int stream_index, int framecnt){
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(ofmt_ctx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_video2 (ofmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame){
ret=0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);
//Write PTS
AVRational time_base = ofmt_ctx->streams[stream_index]->time_base;//{ 1, 1000 };
AVRational r_framerate1 = ifmt_ctx->streams[stream_index]->r_frame_rate;// { 50, 2 };
AVRational time_base_q = { 1, AV_TIME_BASE };
//Duration between 2 frames (us)
int64_t calc_duration = (double)(AV_TIME_BASE)*(1 / av_q2d(r_framerate1)); //內部時間戳
//Parameters
enc_pkt.pts = av_rescale_q(framecnt*calc_duration, time_base_q, time_base);
enc_pkt.dts = enc_pkt.pts;
enc_pkt.duration = av_rescale_q(calc_duration, time_base_q, time_base);
/* copy packet*/
//轉換PTS/DTS（Convert PTS/DTS）
enc_pkt.pos = -1;
framecnt++;
ofmt_ctx->duration=enc_pkt.duration * framecnt;
/* mux encoded frame */
ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
return ret;
}