Qt與FFmpeg聯合開發指南（三）——編碼（1）：代碼流程演示

時間 2019-11-09

原文原文鏈接

前兩講演示了基本的解碼流程和簡單功能封裝，今天咱們開始學習編碼。編碼就是封裝音視頻流的過程，在整個編碼教程中，我會首先在一個函數中演示完成的編碼流程，再解釋其中存在的問題。下一講咱們會將編碼功能進行封裝並解釋針對不一樣的輸出環境代碼上須要注意的地方。最後咱們還會把以前作好的解碼器添加進開發環境，實現PC屏幕和攝像頭錄製而後再經過播放器播放。git

首先說明一下本章的目標：算法

經過Qt進行視頻採集
經過Qt進行音頻採集
對音視頻編碼成mp4文件並可以經過vlc播放

1、經過Qt進行視頻採集數組

Qt提供了對桌面錄屏的支持，咱們能夠很輕鬆的完成開發緩存

// 首先獲取到完整桌面的窗口句柄已經寬高信息
WId wid = QApplication::desktop()->winId();
int width = QApplication::desktop()->width();
int height = QApplication::desktop()->height();

// 截屏得到圖片
static QScreen *screen = NULL;
if (!screen) {
    screen = QGuiApplication::primaryScreen();
}
QPixmap pix = screen->grabWindow(wid);
const uchar *rgb = pix.toImage().bits();

這裏有一點須要特別注意，當咱們把上面的代碼封裝進函數之後，咱們沒法直接經過返回值獲取到rgb數據。這個地方曾經卡了我好幾天，緣由在於通過grabWindow(wid)函數獲取到的QPixmap對象是屬於函數的局部變量，在函數結束之後這個該變量包括bits()包含的數據都會被清理掉。因此若是咱們想在函數外部繼續使用圖片數據就必須對QImage進行一次深拷貝。我提供兩條思路，一是直接將QImage對象進行深拷貝，而後使用它的bits()數據。可是這樣的話若是咱們只在外部析構bits()中的數據其實對內存的清理工做並不完整。另外一個方法是咱們直接對bits()裏的數據進行拷貝，可是因爲QImage對圖片的保存數據並不是是連續的尋址空間因此咱們須要作一次轉換。爲了方便起見咱們先按照第一種思路設計。數據結構

const uchar* VideoAcquisition::getRGB()
{
    static QScreen *screen = NULL;
    if (!screen) {
        screen = QGuiApplication::primaryScreen();
    }
        WId wid = QApplication::desktop()->winId();
    int width = QApplication::desktop()->width();
    int height = QApplication::desktop()->height();

    QPixmap pix = screen->grabWindow(wid);
    QImage *image = new QImage(pix.toImage().copy(0, 0, width, height));

    return image->bits();
}

2、經過Qt進行音頻採集ide

與視頻採集的圖片不一樣，音頻數據對應的是一段時間的錄音。雖然Qt也提供了統一的音頻錄製接口，不過咱們首先須要對錄音設備進行初始化。主要是設置錄音的參數和控制每次從音頻緩存中讀取的數據大小。這裏咱們以CD音質爲標準，即採樣率：44100Hz，通道數：2，採樣位數：16bit，編碼格式：audio/pcm。函數

首先初始化一個錄音設備：QIODevice學習

QAudioFormat fmt;
fmt.setSampleRate(44100);
fmt.setChannelCount(2);
fmt.setSampleSize(16); // 採樣大小 = 採樣位數 * 8
fmt.setSampleType(QAudioFormat::UnSignedInt);
fmt.setByteOrder(QAudioFormat::LittleEndian);
fmt.setCodec("audio/pcm");
QAudioInput *audioInput = new QAudioInput(fmt);
QIODevice *device = audioInput->start();

假設咱們每次從音頻緩存中讀取1024個採樣點的數據，已知採樣的其它條件爲雙通道和每一個採樣點兩位。則咱們用於保存數據的數組大小爲：char *pcm = new char[1024 * 2 * 2]ui

const char* AudioAcquisition::getPCM()
{
    int readOnceSize = 1024; // 每次從音頻設備中讀取的數據大小
    int offset = 0; // 當前已經讀到的數據大小，做爲pcm的偏移量
    int pcmSize = 1024 * 2 * 2;
    char *pcm = new char[pcmSize];
    while (audioInput) {
        int remains = pcmSize - offset; // 剩餘空間
        int ready = audioInput->bytesReady(); // 音頻採集設備目前已經準備好的數據大小
        if (ready < readOnceSize) { // 當前音頻設備中的數據不足
            QThread::msleep(1);
            continue;
        }
        if (remains < readOnceSize) { // 當幀存儲（pcmSize）的剩餘空間（remainSize）小於單次讀取數據預設（readSizeOnce）時
            device->read(pcm + offset, remains); // 從設備中讀取剩餘空間大小的數據
            // 讀滿一幀數據退出
            break;
        }
        int len = device->read(pcm + offset, readOnceSize);
        offset += len;
    }
    return pcm;
}

完成了音視頻採集工做之後，接下來是本章的重點——編碼——也就是調用FFmpeg庫的過程。編碼

3、對音視頻編碼成mp4文件

（1）初始化FFmpeg

av_register_all();
avcodec_register_all();
avformat_network_init();

（2）設置三個參數分別用於保存錯誤代碼、錯誤信息和輸出文件路徑

int errnum = 0;
char errbuf[1024] = { 0 };
char *filename = "D:/test.mp4";
// 視頻採集對象
VideoAcquisition *va = new VideoAcquisition();
// 音頻採集對象
AudioAcquisition *aa = new AudioAcquisition();

（3）建立輸出的包裝器

AVFormatContext *pFormatCtx = NULL;
errnum = avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, filename);
if (errnum < 0) {
    av_strerror(errnum, errbuf, sizeof(errbuf));
}

（4）建立這對h264的編碼器和編碼器上下文，並向編碼器上下文中配置參數

// h264視頻編碼器
const AVCodec *vcodec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264);
if (!vcodec) {
    cout << "avcodec_find_encoder failed!" << endl;
}
// 建立編碼器上下文
AVCodecContext *pVideoCodecCtx = avcodec_alloc_context3(vcodec);
if (!pVideoCodecCtx) {
    cout << "avcodec_alloc_context3 failed!" << endl;
}

// 比特率、寬度、高度
pVideoCodecCtx->bit_rate = 4000000;
pVideoCodecCtx->width = va->getWidth(); // 視頻寬度
pVideoCodecCtx->height = va->getHeight(); // 視頻高度
// 時間基數、幀率
pVideoCodecCtx->time_base = { 1, 25 };
pVideoCodecCtx->framerate = { 25, 1 };
// 關鍵幀間隔
pVideoCodecCtx->gop_size = 10;
// 不使用b幀
pVideoCodecCtx->max_b_frames = 0;
// 幀、編碼格式
pVideoCodecCtx->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV420P;
pVideoCodecCtx->codec_id = AVCodecID::AV_CODEC_ID_H264;
// 預設：快速
av_opt_set(pVideoCodecCtx->priv_data, "preset", "superfast", 0);
// 全局頭
pVideoCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

（5）開啓編碼器

errnum = avcodec_open2(pVideoCodecCtx, vcodec, NULL);
if (errnum < 0) {
    cout << "avcodec_open2 failed!" << endl;
}

（6）爲封裝器建立視頻流

// 爲封裝器建立視頻流
AVStream *pVideoStream = avformat_new_stream(pFormatCtx, NULL);
if (!pVideoStream) {
    cout << "avformat_new_stream video stream failed!" << endl;
}
pVideoStream->codec->codec_tag = 0;
pVideoStream->codecpar->codec_tag = 0;
// 配置視頻流的編碼參數
avcodec_parameters_from_context(pVideoStream->codecpar, pVideoCodecCtx);

（7）建立從RGB格式到YUV420格式的轉碼器

SwsContext *pSwsCtx = sws_getContext(
    va->getWidth(), va->getHeight(), AVPixelFormat::AV_PIX_FMT_BGRA, // 輸入
    va->getWidth(), va->getHeight(), AVPixelFormat::AV_PIX_FMT_YUV420P, // 輸出
    SWS_BICUBIC, // 算法
    0, 0, 0);
if (!pSwsCtx) {
    cout << "sws_getContext failed" << endl;
}

（8）初始化一個視頻幀的對象並分配空間

// 編碼階段的視頻幀結構
AVFrame *vframe = av_frame_alloc();
vframe->format = AVPixelFormat::AV_PIX_FMT_YUV420P;
vframe->width = va->getWidth();
vframe->height = va->getHeight();
vframe->pts = 0;
// 爲視頻幀分配空間
errnum = av_frame_get_buffer(vframe, 32);
if (errnum < 0) {
    cout << "av_frame_get_buffer failed" << endl;
}

以上8個步驟是對視頻部分的代碼演示，下面是音頻部分。基本的操做過程和視頻一致。

（9）建立aac的音頻編碼器和編碼器上下文

// 建立音頻編碼器，指定類型爲AAC
const AVCodec *acodec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_AAC);
if (!acodec) {
    cout << "avcodec_find_encoder failed!" << endl;
}

// 根據編碼器建立編碼器上下文
AVCodecContext *pAudioCodecCtx = avcodec_alloc_context3(acodec);
if (!pAudioCodecCtx) {
    cout << "avcodec_alloc_context3 failed!" << endl;
}

// 比特率、採樣率、採樣類型、音頻通道、文件格式
pAudioCodecCtx->bit_rate = 64000;
pAudioCodecCtx->sample_rate = 44100;
pAudioCodecCtx->sample_fmt = AVSampleFormat::AV_SAMPLE_FMT_FLTP;
pAudioCodecCtx->channels = 2;
pAudioCodecCtx->channel_layout = av_get_default_channel_layout(2); // 根據音頻通道數自動選擇輸出類型（默認爲立體聲）
pAudioCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

（10）開啓編碼器

// 打開編碼器
errnum = avcodec_open2(pAudioCodecCtx, acodec, NULL);
if (errnum < 0) {
    avcodec_free_context(&pAudioCodecCtx);
    cout << "avcodec_open2 failed" << endl;
}

（11）向封裝器添加音頻流

// 添加音頻流
AVStream *pAudioStream = avformat_new_stream(pFormatCtx, NULL);
if (!pAudioStream) {
    cout << "avformat_new_stream failed" << endl;
    return -1;
}
pAudioStream->codec->codec_tag = 0;
pAudioStream->codecpar->codec_tag = 0;
// 配置音頻流的編碼器參數
avcodec_parameters_from_context(pAudioStream->codecpar, pAudioCodecCtx);

（12）建立從FLTP到S16的音頻重採樣上下文

SwrContext *swrCtx = NULL;
swrCtx = swr_alloc_set_opts(swrCtx,
    av_get_default_channel_layout(2), AVSampleFormat::AV_SAMPLE_FMT_FLTP, 44100, // 輸出
    av_get_default_channel_layout(2), AVSampleFormat::AV_SAMPLE_FMT_S16, 44100, // 輸入
    0, 0);
errnum = swr_init(swrCtx);
if (errnum < 0) {
    cout << "swr_init failed" << endl;
}

（13）初始化音頻幀的結構

// 建立音頻幀
AVFrame *aframe = av_frame_alloc();
aframe->format = AVSampleFormat::AV_SAMPLE_FMT_FLTP;
aframe->channels = 2;
aframe->channel_layout = av_get_default_channel_layout(2);
aframe->nb_samples = 1024;
// 爲音頻幀分配空間
errnum = av_frame_get_buffer(aframe, 0);
if (errnum < 0) {
    cout << "av_frame_get_buffer failed" << endl;
}

音頻部分的代碼演示完成。下面是開啓輸出流，並循環進行音視頻採集編碼。

（14）打開輸出的IO

// 打開輸出流IO
errnum = avio_open(&pFormatCtx->pb, filename, AVIO_FLAG_WRITE); // 打開AVIO流
if (errnum < 0) {
    avio_close(pFormatCtx->pb);
    cout << "avio_open failed" << endl;
}

（15）寫頭文件

// 寫文件頭
errnum = avformat_write_header(pFormatCtx, NULL);
if (errnum < 0) {
    cout << "avformat_write_header failed" << endl;
}

（16）編碼並將數據寫入文件，因爲咱們尚未設計出控制功能，暫且只編碼200幀視頻幀。按25幀/秒計算，應該生成長度爲8秒視頻文件。可因爲緩存的緣故，最後每每會丟幾幀數據。所以實際長度不足8秒。

int vpts = 0;
int apts = 0;

while (vpts < 200) {
    // 視頻編碼
    const uchar *rgb = va->getRGB();
    // 固定寫法：配置1幀原始視頻畫面的數據結構一般爲RGBA的形式
    uint8_t *srcSlice[AV_NUM_DATA_POINTERS] = { 0 };
    srcSlice[0] = (uint8_t *)rgb;
    int srcStride[AV_NUM_DATA_POINTERS] = { 0 };
    srcStride[0] = va->getWidth() * 4;
    // 轉換
    int h = sws_scale(pSwsCtx, srcSlice, srcStride, 0, va->getHeight(), vframe->data, vframe->linesize);
    if (h < 0) {
        cout << "sws_scale failed" << endl;
        break;
    }
    // pts遞增
    vframe->pts = vpts++;
    errnum = avcodec_send_frame(pVideoCodecCtx, vframe);
    if (errnum < 0) {
        cout << "avcodec_send_frame failed" << endl;
        continue;
    }
    // 視頻編碼報文
    AVPacket *vpkt = av_packet_alloc();

    errnum = avcodec_receive_packet(pVideoCodecCtx, vpkt);
    if (errnum < 0 || vpkt->size <= 0) {
        av_packet_free(&vpkt);
        cout << "avcodec_receive_packet failed" << endl;
        continue;
    }
    // 轉換pts
    av_packet_rescale_ts(vpkt, pVideoCodecCtx->time_base, pVideoStream->time_base);
    vpkt->stream_index = pVideoStream->index;

    // 向封裝器中寫入壓縮報文，該函數會自動釋放pkt空間，不須要調用者手動釋放
    errnum = av_interleaved_write_frame(pFormatCtx, vpkt);
    if (errnum < 0) {
        av_strerror(errnum, errbuf, sizeof(errbuf));
        cout << errbuf << endl;
        cout << "av_interleaved_write_frame failed" << endl;
        continue;
    }
    // 析構圖像數據：注意這裏只析構了圖片的數據，實際的QImage對象還在內存中
    delete rgb;

    // 音頻編碼

    // 固定寫法：配置一幀音頻的數據結構
    const char *pcm = aa->getPCM();
    if (!pcm) {
        continue;
    }
    const uint8_t *in[AV_NUM_DATA_POINTERS] = { 0 };
    in[0] = (uint8_t *)pcm;

    // 音頻重採樣
    int len = swr_convert(swrCtx,
        aframe->data, aframe->nb_samples, // 輸出
        in, aframe->nb_samples); // 輸入
    if (len < 0) {
        cout << "swr_convert failed" << endl;
        continue;
    }
    // 音頻編碼
    errnum = avcodec_send_frame(pAudioCodecCtx, aframe);
    if (errnum < 0) {
        cout << "avcodec_send_frame failed" << endl;
        continue;
    }
    // 音頻編碼報文
    AVPacket *apkt = av_packet_alloc();
    errnum = avcodec_receive_packet(pAudioCodecCtx, apkt);
    if (errnum < 0) {
        av_packet_free(&apkt);
        cout << "avcodec_receive_packet failed" << endl;
        continue;
    }
    apkt->stream_index = pAudioStream->index;
    apkt->pts = apts;
    apkt->dts = apts;
    apts += av_rescale_q(aframe->nb_samples, { 1, pAudioCodecCtx->sample_rate }, pAudioCodecCtx->time_base);
    // 寫音頻幀
    errnum = av_interleaved_write_frame(pFormatCtx, apkt);
    if (errnum < 0) {
        av_strerror(errnum, errbuf, sizeof(errbuf));
        cout << errbuf << endl;
        cout << "av_interleaved_write_frame failed" << endl;
        continue;
    }
    delete pcm;
}

（17）寫入文件尾和關閉IO

// 寫入文件尾
errnum = av_write_trailer(pFormatCtx);
if (errnum != 0) {
    cout << "av_write_trailer failed" << endl;
}
errnum = avio_closep(&pFormatCtx->pb); // 關閉AVIO流
if (errnum != 0) {
    cout << "avio_close failed" << endl;
}

（18）清理

if (pFormatCtx) {
    avformat_close_input(&pFormatCtx); // 關閉封裝上下文
}
// 關閉編碼器和清理上下文的全部空間
if (pVideoCodecCtx) {
    avcodec_close(pVideoCodecCtx);
    avcodec_free_context(&pVideoCodecCtx);
}
if (pAudioCodecCtx) {
    avcodec_close(pAudioCodecCtx);
    avcodec_free_context(&pAudioCodecCtx);
}
// 音視頻轉換上下文
if (pSwsCtx) {
    sws_freeContext(pSwsCtx);
    pSwsCtx = NULL;
}
if (swrCtx) {
    swr_free(&swrCtx);
}
// 清理音視頻幀
if (vframe) {
    av_frame_free(&vframe);
}
if (aframe) {
    av_frame_free(&aframe);
}