要作視頻流解碼,必需要了解cuda自身的解碼流,由於兩者是同樣的底層實現,不同的上層調用html
那cuda的解碼流程是如何的呢linux
在 https://developer.nvidia.com/nvidia-video-codec-sdk 下載 Video_Codec_SDK_8.0.14web
解壓開來app
在sampls裏面有幾個針對不一樣場景應用的小例子,若是不知道本身該參考哪個,就須要去看開發文檔,doc裏面有一個 NVENC_VideoEncoder_API_ProgGuide.pdf 文檔ide
因爲我這裏使用的是視頻流解碼,因此最好去查看NvTranscoder這個demo.函數
在NvTranscoder裏面主要關注紅框中的這幾個文件測試
NvTranscoder.cpp實現了主函數ui
VideoDecoder.cpp實現瞭解碼this
FrameQueue.cpp實現了gpu解碼後的數據回調編碼
先看NvTranscoder.cpp的主要代碼(比較冗餘,有興趣能夠所有看)
int main(int argc, char* argv[]) { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) typedef HMODULE CUDADRIVER; #else typedef void *CUDADRIVER; #endif CUDADRIVER hHandleDriver = 0; __cu(cuInit(0, __CUDA_API_VERSION, hHandleDriver)); __cu(cuvidInit(0)); EncodeConfig encodeConfig = { 0 }; encodeConfig.endFrameIdx = INT_MAX; encodeConfig.bitrate = 5000000; encodeConfig.rcMode = NV_ENC_PARAMS_RC_CONSTQP; encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH; encodeConfig.codec = NV_ENC_H264; encodeConfig.fps = 0; encodeConfig.qp = 28; encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR; encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR; encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET; encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET; encodeConfig.presetGUID = NV_ENC_PRESET_DEFAULT_GUID; encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; NVENCSTATUS nvStatus = CNvHWEncoder::ParseArguments(&encodeConfig, argc, argv); if (nvStatus != NV_ENC_SUCCESS) { PrintHelp(); return 1; } if (!encodeConfig.inputFileName || !encodeConfig.outputFileName) { PrintHelp(); return 1; } encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb"); if (encodeConfig.fOutput == NULL) { PRINTERR("Failed to create \"%s\"\n", encodeConfig.outputFileName); return 1; } //init cuda CUcontext cudaCtx; CUdevice device; __cu(cuDeviceGet(&device, encodeConfig.deviceID)); __cu(cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device)); CUcontext curCtx; CUvideoctxlock ctxLock; __cu(cuCtxPopCurrent(&curCtx)); __cu(cuvidCtxLockCreate(&ctxLock, curCtx)); CudaDecoder* pDecoder = new CudaDecoder; FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock); pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height); int decodedW, decodedH, decodedFRN, decodedFRD, isProgressive; pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD, &isProgressive); if (decodedFRN <= 0 || decodedFRD <= 0) { decodedFRN = 30; decodedFRD = 1; } if(encodeConfig.width <= 0 || encodeConfig.height <= 0) { encodeConfig.width = decodedW; encodeConfig.height = decodedH; } float fpsRatio = 1.f; if (encodeConfig.fps <= 0) { encodeConfig.fps = decodedFRN / decodedFRD; } else { fpsRatio = (float)encodeConfig.fps * decodedFRD / decodedFRN; } encodeConfig.pictureStruct = (isProgressive ? NV_ENC_PIC_STRUCT_FRAME : 0); pFrameQueue->init(encodeConfig.width, encodeConfig.height); VideoEncoder* pEncoder = new VideoEncoder(ctxLock); assert(pEncoder->GetHWEncoder()); nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) return 1; encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec); printf("Encoding input : \"%s\"\n", encodeConfig.inputFileName); printf(" output : \"%s\"\n", encodeConfig.outputFileName); printf(" codec : \"%s\"\n", encodeConfig.codec == NV_ENC_HEVC ? "HEVC" : "H264"); printf(" size : %dx%d\n", encodeConfig.width, encodeConfig.height); printf(" bitrate : %d bits/sec\n", encodeConfig.bitrate); printf(" vbvMaxBitrate : %d bits/sec\n", encodeConfig.vbvMaxBitrate); printf(" vbvSize : %d bits\n", encodeConfig.vbvSize); printf(" fps : %d frames/sec\n", encodeConfig.fps); printf(" rcMode : %s\n", encodeConfig.rcMode == NV_ENC_PARAMS_RC_CONSTQP ? "CONSTQP" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR ? "VBR" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR ? "CBR" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_MINQP ? "VBR MINQP (deprecated)" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ? "CBR_LOWDELAY_HQ" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_HQ ? "CBR_HQ" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_HQ ? "VBR_HQ" : "UNKNOWN"); if (encodeConfig.gopLength == NVENC_INFINITE_GOPLENGTH) printf(" goplength : INFINITE GOP \n"); else printf(" goplength : %d \n", encodeConfig.gopLength); printf(" B frames : %d \n", encodeConfig.numB); printf(" QP : %d \n", encodeConfig.qp); printf(" preset : %s\n", (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HQ_GUID) ? "LOW_LATENCY_HQ" : (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HP_GUID) ? "LOW_LATENCY_HP" : (encodeConfig.presetGUID == NV_ENC_PRESET_HQ_GUID) ? "HQ_PRESET" : (encodeConfig.presetGUID == NV_ENC_PRESET_HP_GUID) ? "HP_PRESET" : (encodeConfig.presetGUID == NV_ENC_PRESET_LOSSLESS_HP_GUID) ? "LOSSLESS_HP" : "LOW_LATENCY_DEFAULT"); printf("\n"); nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) return 1; nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) return 1; unsigned long long lStart, lEnd, lFreq; NvQueryPerformanceCounter(&lStart); //start decoding thread #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif //start encoding thread int frmProcessed = 0; int frmActual = 0; while(!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty()) ) { CUVIDPARSERDISPINFO pInfo; if(pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.progressive_frame = pInfo.progressive_frame; oVPP.second_field = 0; oVPP.top_field_first = pInfo.top_field_first; oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1); cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); EncodeFrameConfig stEncodeConfig = { 0 }; NV_ENC_PIC_STRUCT picType = (pInfo.progressive_frame || pInfo.repeat_first_field >= 2 ? NV_ENC_PIC_STRUCT_FRAME : (pInfo.top_field_first ? NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM : NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP)); stEncodeConfig.dptr = dMappedFrame; stEncodeConfig.pitch = pitch; stEncodeConfig.width = encodeConfig.width; stEncodeConfig.height = encodeConfig.height; int dropOrDuplicate = MatchFPS(fpsRatio, frmProcessed, frmActual); for (int i = 0; i <= dropOrDuplicate; i++) { pEncoder->EncodeFrame(&stEncodeConfig, picType); frmActual++; } frmProcessed++; cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); pFrameQueue->releaseFrame(&pInfo); } } pEncoder->EncodeFrame(NULL, NV_ENC_PIC_STRUCT_FRAME, true); #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif if (pEncoder->GetEncodedFrames() > 0) { NvQueryPerformanceCounter(&lEnd); NvQueryPerformanceFrequency(&lFreq); double elapsedTime = (double)(lEnd - lStart)/(double)lFreq; printf("Total time: %fms, Decoded Frames: %d, Encoded Frames: %d, Average FPS: %f\n", elapsedTime * 1000, pDecoder->m_decodedFrames, pEncoder->GetEncodedFrames(), (float)pEncoder->GetEncodedFrames() / elapsedTime); } pEncoder->Deinitialize(); delete pDecoder; delete pEncoder; delete pFrameQueue; cuvidCtxLockDestroy(ctxLock); __cu(cuCtxDestroy(cudaCtx)); return 0; }
下面這個是個人主要流程精簡版
int main(int argc, char* argv[]) { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) typedef HMODULE CUDADRIVER; #else typedef void *CUDADRIVER; #endif CUDADRIVER hHandleDriver = 0; __cu(cuInit(0, __CUDA_API_VERSION, hHandleDriver));//初始化cuda環境,必須的 __cu(cuvidInit(0)); //初始化解碼器 //init cuda CUcontext cudaCtx; CUdevice device; __cu(cuDeviceGet(&device, deviceID)); //獲得顯卡操做對象,deviceID是顯卡的id,通常說來若是一張顯卡,id就是0,兩張就是0,1 __cu(cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device)); //建立對應顯卡的運行環境 CUcontext curCtx; CUvideoctxlock ctxLock; __cu(cuCtxPopCurrent(&curCtx));//彈出當前CPU線程的裏面的可用的cuda環境,也就是上面建立的環境 __cu(cuvidCtxLockCreate(&ctxLock, curCtx));//爲gpu上鎖 CudaDecoder* pDecoder = new CudaDecoder;//建立cuda解碼對象(重點查看) FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock);//建立解碼輸出隊列 pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height);//初始化解碼器(重點查看) pFrameQueue->init(encodeConfig.width, encodeConfig.height);//初始化解碼輸出隊列 //啓動解碼線程 #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif //start encoding thread int frmProcessed = 0; int frmActual = 0; //從解碼輸出隊列裏面拉取解出來的數據 while(!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty()) ) { CUVIDPARSERDISPINFO pInfo; if(pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.progressive_frame = pInfo.progressive_frame; oVPP.second_field = 0; oVPP.top_field_first = pInfo.top_field_first; oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1); //獲取數據在GPU中的地址dMappedFrame,大小爲pitch個 cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); //由於解碼後的數據地址仍是在GPU中,全部須要找到 unsigned int nv12_size = pitch * (pDecoder->iHeight + pDecoder->iHeight/2); // 12bpp //從GPU內存拷貝到pa->pFrameBuffer(CPU的內存地址) oResult = cuMemcpyDtoH(pa->pFrameBuffer, dMappedFrame, nv12_size); //釋放GPU中的內存 cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); pFrameQueue->releaseFrame(&pInfo); } } #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif delete pDecoder; delete pFrameQueue; cuvidCtxLockDestroy(ctxLock); __cu(cuCtxDestroy(cudaCtx)); return 0; }
其中的解碼器的流程調用是重點關注的
new解碼器
CudaDecoder::CudaDecoder() : m_videoSource(NULL), m_videoParser(NULL), m_videoDecoder(NULL), m_ctxLock(NULL), m_decodedFrames(0), m_bFinish(false) { }
初始化解碼器,這裏建立了三個對象,一個是源,一個是解碼器,一個是解析器,
//初始化Gpu解碼器 void CudaDecoder::InitVideoDecoder(const char* videoPath, CUvideoctxlock ctxLock, FrameQueue* pFrameQueue, int targetWidth, int targetHeight) { assert(videoPath);//數據流地址 assert(ctxLock); assert(pFrameQueue); m_pFrameQueue = pFrameQueue; CUresult oResult; m_ctxLock = ctxLock; //init video source CUVIDSOURCEPARAMS oVideoSourceParameters; memset(&oVideoSourceParameters, 0, sizeof(CUVIDSOURCEPARAMS)); oVideoSourceParameters.pUserData = this; oVideoSourceParameters.pfnVideoDataHandler = HandleVideoData; oVideoSourceParameters.pfnAudioDataHandler = NULL; oResult = cuvidCreateVideoSource(&m_videoSource, videoPath, &oVideoSourceParameters);//建立數據源對象,目的是在回調裏面獲得數據包,而後在回調裏面能夠用m_videoParser處理,只支持文件 if (oResult != CUDA_SUCCESS) { fprintf(stderr, "cuvidCreateVideoSource failed\n"); fprintf(stderr, "Please check if the path exists, or the video is a valid H264 file\n"); exit(-1); } //init video decoder CUVIDEOFORMAT oFormat; cuvidGetSourceVideoFormat(m_videoSource, &oFormat, 0); if (oFormat.codec != cudaVideoCodec_H264 && oFormat.codec != cudaVideoCodec_HEVC) { fprintf(stderr, "The sample only supports H264/HEVC input video!\n"); exit(-1); } if (oFormat.chroma_format != cudaVideoChromaFormat_420) { fprintf(stderr, "The sample only supports 4:2:0 chroma!\n"); exit(-1); } CUVIDDECODECREATEINFO oVideoDecodeCreateInfo; memset(&oVideoDecodeCreateInfo, 0, sizeof(CUVIDDECODECREATEINFO)); oVideoDecodeCreateInfo.CodecType = oFormat.codec; oVideoDecodeCreateInfo.ulWidth = oFormat.coded_width; oVideoDecodeCreateInfo.ulHeight = oFormat.coded_height; oVideoDecodeCreateInfo.ulNumDecodeSurfaces = 8; if ((oVideoDecodeCreateInfo.CodecType == cudaVideoCodec_H264) || (oVideoDecodeCreateInfo.CodecType == cudaVideoCodec_H264_SVC) || (oVideoDecodeCreateInfo.CodecType == cudaVideoCodec_H264_MVC)) { // assume worst-case of 20 decode surfaces for H264 oVideoDecodeCreateInfo.ulNumDecodeSurfaces = 20; } if (oVideoDecodeCreateInfo.CodecType == cudaVideoCodec_VP9) oVideoDecodeCreateInfo.ulNumDecodeSurfaces = 12; if (oVideoDecodeCreateInfo.CodecType == cudaVideoCodec_HEVC) { // ref HEVC spec: A.4.1 General tier and level limits int MaxLumaPS = 35651584; // currently assuming level 6.2, 8Kx4K int MaxDpbPicBuf = 6; int PicSizeInSamplesY = oVideoDecodeCreateInfo.ulWidth * oVideoDecodeCreateInfo.ulHeight; int MaxDpbSize; if (PicSizeInSamplesY <= (MaxLumaPS>>2)) MaxDpbSize = MaxDpbPicBuf * 4; else if (PicSizeInSamplesY <= (MaxLumaPS>>1)) MaxDpbSize = MaxDpbPicBuf * 2; else if (PicSizeInSamplesY <= ((3*MaxLumaPS)>>2)) MaxDpbSize = (MaxDpbPicBuf * 4) / 3; else MaxDpbSize = MaxDpbPicBuf; MaxDpbSize = MaxDpbSize < 16 ? MaxDpbSize : 16; oVideoDecodeCreateInfo.ulNumDecodeSurfaces = MaxDpbSize + 4; } oVideoDecodeCreateInfo.ChromaFormat = oFormat.chroma_format; oVideoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;//設置輸出格式爲NV12 oVideoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; if (targetWidth <= 0 || targetHeight <= 0) { oVideoDecodeCreateInfo.ulTargetWidth = oFormat.display_area.right - oFormat.display_area.left; oVideoDecodeCreateInfo.ulTargetHeight = oFormat.display_area.bottom - oFormat.display_area.top; } else { oVideoDecodeCreateInfo.ulTargetWidth = targetWidth;//輸出長寬 oVideoDecodeCreateInfo.ulTargetHeight = targetHeight; } oVideoDecodeCreateInfo.display_area.left = 0; oVideoDecodeCreateInfo.display_area.right = (short)oVideoDecodeCreateInfo.ulTargetWidth; oVideoDecodeCreateInfo.display_area.top = 0; oVideoDecodeCreateInfo.display_area.bottom = (short)oVideoDecodeCreateInfo.ulTargetHeight; oVideoDecodeCreateInfo.ulNumOutputSurfaces = 2; oVideoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; oVideoDecodeCreateInfo.vidLock = m_ctxLock; oResult = cuvidCreateDecoder(&m_videoDecoder, &oVideoDecodeCreateInfo);//建立解碼器 if (oResult != CUDA_SUCCESS) { fprintf(stderr, "cuvidCreateDecoder() failed, error code: %d\n", oResult); exit(-1); } m_oVideoDecodeCreateInfo = oVideoDecodeCreateInfo; //init video parser CUVIDPARSERPARAMS oVideoParserParameters; memset(&oVideoParserParameters, 0, sizeof(CUVIDPARSERPARAMS)); oVideoParserParameters.CodecType = oVideoDecodeCreateInfo.CodecType; oVideoParserParameters.ulMaxNumDecodeSurfaces = oVideoDecodeCreateInfo.ulNumDecodeSurfaces; oVideoParserParameters.ulMaxDisplayDelay = 1; oVideoParserParameters.pUserData = this; oVideoParserParameters.pfnSequenceCallback = HandleVideoSequence;//數據源拉取出來的回調 oVideoParserParameters.pfnDecodePicture = HandlePictureDecode; oVideoParserParameters.pfnDisplayPicture = HandlePictureDisplay;//解碼後的數據回調 oResult = cuvidCreateVideoParser(&m_videoParser, &oVideoParserParameters);//建立解析器 目的是協助解析包,能夠回調獲得每幀的格式,回調獲得預解碼的數據,回調獲得最後圖片數據 if (oResult != CUDA_SUCCESS) { fprintf(stderr, "cuvidCreateVideoParser failed, error code: %d\n", oResult); exit(-1); } }
源對象加載數據後會回調,裏面有CUVIDSOURCEDATAPACKET格式的數據包,數據包會給解析器,解析器回傳數據給解碼器,解碼器把數據回傳給隊列,發往主線程
static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket) { assert(pUserData); CudaDecoder* pDecoder = (CudaDecoder*)pUserData; CUresult oResult = cuvidParseVideoData(pDecoder->m_videoParser, pPacket); if(oResult != CUDA_SUCCESS) { printf("error!\n"); } return 1; } static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat) { assert(pUserData); CudaDecoder* pDecoder = (CudaDecoder*)pUserData; if ((pFormat->codec != pDecoder->m_oVideoDecodeCreateInfo.CodecType) || // codec-type (pFormat->coded_width != pDecoder->m_oVideoDecodeCreateInfo.ulWidth) || (pFormat->coded_height != pDecoder->m_oVideoDecodeCreateInfo.ulHeight) || (pFormat->chroma_format != pDecoder->m_oVideoDecodeCreateInfo.ChromaFormat)) { fprintf(stderr, "NvTranscoder doesn't deal with dynamic video format changing\n"); return 0; } return 1; } static int CUDAAPI HandlePictureDecode(void* pUserData, CUVIDPICPARAMS* pPicParams) { assert(pUserData); CudaDecoder* pDecoder = (CudaDecoder*)pUserData; pDecoder->m_pFrameQueue->waitUntilFrameAvailable(pPicParams->CurrPicIdx); assert(CUDA_SUCCESS == cuvidDecodePicture(pDecoder->m_videoDecoder, pPicParams)); return 1; } static int CUDAAPI HandlePictureDisplay(void* pUserData, CUVIDPARSERDISPINFO* pPicParams) { assert(pUserData); CudaDecoder* pDecoder = (CudaDecoder*)pUserData; pDecoder->m_pFrameQueue->enqueue(pPicParams); pDecoder->m_decodedFrames++; return 1; }
看了以上流程,估計有一個大概的流程在內心了,
必要的gpu初始化------》初始化解碼器,解析器,源解釋器------》運行-----》處理輸出數據
如今輪到咱們本身的需求,個人需求就是實現那個ffmpeg的解碼GPU化,先看看官方文檔
首先用這個必須有一些要求
NVIDIA Video Codec SDK 8.0 System Requirements * NVIDIA Kepler/Maxwell/Pascal GPU with hardware video accelerators - Refer to the NVIDIA Video SDK developer zone web page (https://developer.nvidia.com/nvidia-video-codec-sdk) for GPUs which support encoding and decoding acceleration. * Windows: Driver version 378.66 or higher * Linux: Driver version 378.13 or higher * CUDA 7.5 Toolkit (optional) [Windows Configuration Requirements] - DirectX SDK is needed. You can download the latest SDK from Microsoft's DirectX website - The CUDA 7.5 Toolkit is optional to install (see below on how to get it) - CUDA toolkit is used for building CUDA kernels that can interop with NVENC. The following environment variables need to be set to build the sample applications included with the SDK * For Windows - DXSDK_DIR: pointing to the DirectX SDK root directory [Linux Configuration Requirements] * For Linux - X11 and OpenGL, GLUT, GLEW libraries for video playback and display - The CUDA 7.5 Toolkit is optional to install (see below on how to get it) - CUDA toolkit is used for building CUDA kernels that can interop with NVENC.
我看下了個人linux基本知足條件
驗證可行性
再看Using_FFmpeg_with_NVIDIA_GPU_Hardware_Acceleration.pdf裏面的提示能夠直接編譯ffmpeg,使用它自帶的cuda解碼器來測試解碼,不過也是有要求的
對號入座,我用的是8.0,因此使用ffmpeg3.4
編譯
./configure --enable-shared -–enable-cuda --enable-cuvid --enable-nvenc --enable-nonfree -–enable-libnpp --extra-cflags=-I/usr/local/cuda/include --extra-ldflags=-L/usr/local/cuda/lib64 --prefix=/home/user/mjl/algo/ffmpeg/build make -j 4(建議用四線程,八線程可能出現找不到的錯誤)
驗證
ffmpeg -y -hwaccel cuvid -c:v h264_cuvid -vsync 0 -i input.mp4 -vf scale_npp=1920:1072 -vcodec h264_nvenc output0.264 -vf scale_npp=1280:720 -vcodec h264_nvenc output1.264 報錯:Unknown decoder 'h264_cuvid'
注意必定要在超級管理員權限下面運行,應爲只有超級管理員才能訪問gpu
正常輸出了文件,證實可行
關於它自帶的解碼器,我一直不是很瞭解,ffmpeg在初始化的時候統一註冊了各類編解碼器,可是如何在上層簡單的調用,一直不明白,這點能夠你們交流
我這裏是本身直接對接,也便於控制數據
avformat_network_init(); av_register_all();//1.註冊各類編碼解碼模塊,若是3.3及以上版本,裏面包含GPU解碼模塊 std::string tempfile = 「xxxx」;//視頻流地址 avformat_find_stream_info(format_context_, nullptr)//2.拉取一小段數據流分析,便於獲得數據的基本格式 if (AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream_index_ < 0)//3.篩選出視頻流 codec_ = avcodec_find_decoder(enc->codec_id);//4.找到對應的解碼器 codec_context_ = avcodec_alloc_context3(codec_);//5.建立解碼器對應的結構體 av_read_frame(format_context_, &packet_); //6.讀取數據包 avcodec_send_packet(codec_context_, &packet_) //7.發出解碼 avcodec_receive_frame(codec_context_, yuv_frame_) //8.接收解碼 sws_scale(y2r_sws_context_, yuv_frame_->data, yuv_frame_->linesize, 0, codec_context_->height, rgb_data_, rgb_line_size_) //9.數據格式轉換
在第一節中說過,4,7,8,9步驟須要修改
數據仍是由ffmpeg拉取,也就是說不須要cuda自帶的源獲取器,只須要對接解碼器和解析器(若是拉取數據也能夠用GPU會更好)
而在ffmpeg中出來的數據格式是AVPacket,而cuda解碼器須要的格式是CUVIDSOURCEDATAPACKET,因此涉及到格式的轉換
開始的時候我在網上資料發現一個 https://www.cnblogs.com/dwdxdy/archive/2013/08/07/3244723.html 這位兄弟的格式轉換部分是這樣實現的
我試過,不行的,沒有任何解碼輸出!
http://www.javashuo.com/article/p-nbtczmtg-gt.html 這位兄弟比較全面,可是其中的
void VideoSource::play_thread(LPVOID lpParam) { AVPacket *avpkt; avpkt = (AVPacket *)av_malloc(sizeof(AVPacket)); CUVIDSOURCEDATAPACKET cupkt; int iPkt = 0; CUresult oResult; while (av_read_frame(pFormatCtx, avpkt) >= 0){ if (bThreadExit){ break; } bStarted = true; if (avpkt->stream_index == videoindex){ cuCtxPushCurrent(g_oContext); if (avpkt && avpkt->size) { if (h264bsfc) { av_bitstream_filter_filter(h264bsfc, pFormatCtx->streams[videoindex]->codec, NULL, &avpkt->data, &avpkt->size, avpkt->data, avpkt->size, 0); } cupkt.payload_size = (unsigned long)avpkt->size; cupkt.payload = (const unsigned char*)avpkt->data; if (avpkt->pts != AV_NOPTS_VALUE) { cupkt.flags = CUVID_PKT_TIMESTAMP; if (pCodecCtx->pkt_timebase.num && pCodecCtx->pkt_timebase.den){ AVRational tb; tb.num = 1; tb.den = AV_TIME_BASE; cupkt.timestamp = av_rescale_q(avpkt->pts, pCodecCtx->pkt_timebase, tb); } else cupkt.timestamp = avpkt->pts; } } else { cupkt.flags = CUVID_PKT_ENDOFSTREAM; } oResult = cuvidParseVideoData(oSourceData_.hVideoParser, &cupkt); if ((cupkt.flags & CUVID_PKT_ENDOFSTREAM) || (oResult != CUDA_SUCCESS)){ break; } iPkt++; //printf("Succeed to read avpkt %d !\n", iPkt); checkCudaErrors(cuCtxPopCurrent(NULL)); } av_free_packet(avpkt); } oSourceData_.pFrameQueue->endDecode(); bStarted = false; }
這部分代碼比較陳舊,仍是沒能正常運行,起來,不過很敬佩這兄弟,能分享到這一步,已經很不錯了!
這是我在他的基礎上修改的代碼,沒有用他的下面這種方式
//h264bsfc = av_bitstream_filter_init("h264_mp4toannexb"); //av_bsf_alloc(av_bsf_get_by_name("h264_mp4toannexb"), &bsf);
改用了av_bsf_send_packet和av_bsf_receive_packet方式,下面的個人代碼
if ((&fsc->packet_) && fsc->packet_.size) { if (fsc->bsf) { //av_bitstream_filter_filter(h264bsfc, codec_context_, NULL, &packet_.data, &packet_.size, packet_.data, packet_.size, 0); //av_bitstream_filter_filter(h264bsfc, video_st->codec, NULL, &packet_.data, &packet_.size, packet_.data, packet_.size, 0); AVPacket filter_packet = { 0 }; AVPacket filtered_packet = { 0 }; int ret; if (&fsc->packet_ && fsc->packet_.size) { if ((ret = av_packet_ref(&filter_packet, &fsc->packet_)) < 0) { //av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); printf("av_packet_ref failed \n"); //return ret; } if ((ret = av_bsf_send_packet(fsc->bsf, &filter_packet)) < 0) { //av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); printf("av_bsf_send_packet failed \n"); av_packet_unref(&filter_packet); //return ret; } if ((ret = av_bsf_receive_packet(fsc->bsf, &filtered_packet)) < 0) { //av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); printf("av_bsf_receive_packet failed \n"); //return ret; } memcpy(&fsc->packet_, &filtered_packet, sizeof(AVPacket)); //&packet_ = &filtered_packet; } } //if (fsc->h264bsfc){ // //av_bitstream_filter_filter(fsc->h264bsfc, fsc->codec_context_, NULL, &fsc->packet_.data, &fsc->packet_.size, fsc->packet_.data, fsc->packet_.size, 0); // av_bitstream_filter_filter(fsc->h264bsfc, fsc->video_st->codec, NULL, &fsc->packet_.data, &fsc->packet_.size, fsc->packet_.data, fsc->packet_.size, 0); //} pPacket.payload_size = (unsigned long)fsc->packet_.size; pPacket.payload = (const unsigned char*)fsc->packet_.data; if (fsc->packet_.pts != AV_NOPTS_VALUE) { //fprintf(stderr, "fsc->packet_.pts != AV_NOPTS_VALUE \n"); pPacket.flags = CUVID_PKT_TIMESTAMP; if (fsc->codec_context_->pkt_timebase.num && fsc->codec_context_->pkt_timebase.den) { //fprintf(stderr, "pkt_timebase.num ok \n"); AVRational tb; tb.num = 1; tb.den = AV_TIME_BASE; //pPacket.timestamp = av_rescale_q(fsc->packet_.pts, fsc->codec_context_->pkt_timebase, tb); pPacket.timestamp = av_rescale_q(fsc->packet_.pts, fsc->codec_context_->pkt_timebase, (AVRational) { 1, 10000000 }); } else { //fprintf(stderr, "pkt_timebase.num null \n"); pPacket.timestamp = fsc->packet_.pts; } } } else { pPacket.flags = CUVID_PKT_ENDOFSTREAM; //fprintf(stderr, "fsc->packet_.pts == AV_NOPTS_VALUE \n"); } fsc->pDecoder->HandleVideoData(&pPacket);
因而,解碼部分就已經實現,有空在貼出所有源碼。
若是以爲還能夠,打賞地址
BTC: 1GYhFurFFWq4Ta9BzFKx961EKtLhnaVHRc
ETH: 0xe54AbD803573FDD245f0Abb75f4c9Ddfc8e72050