本文轉自:http://blog.csdn.net/leixiaohua1020/article/details/45790195 歡迎訪問原處!
本文簡單分析x264的x264_slice_write()的源代碼。x264_slice_write()是x264項目的核心,它完成了編碼了一個Slice的工作。根據功能的不同,該函數可以分爲濾波(Filter),分析(Analysis),宏塊編碼(Encode)和熵編碼(Entropy Encoding)幾個子模塊。本文首先對x264_slice_write()進行總體的概括,在後續文章中將會對上述幾個子模塊展開進行分析。
函數調用關係圖
x264_slice_write()的源代碼在整個x264中的位置如下圖所示。
x264_slice_write()的函數調用關係如下圖所示。
從圖中可以看出,x264_slice_write()調用瞭如下函數:
x264_nal_start():開始寫一個NALU。
x264_macroblock_thread_init():初始化宏塊重建數據緩存fdec_buf[]和編碼數據緩存fenc_buf[]。
x264_slice_header_write():輸出 Slice Header。
x264_fdec_filter_row():濾波模塊。該模塊包含了環路濾波,半像素插值,SSIM/PSNR的計算。
x264_macroblock_cache_load():將要編碼的宏塊的周圍的宏塊的信息讀進來。
x264_macroblock_analyse():分析模塊。該模塊包含了幀內預測模式分析以及幀間運動估計等。
x264_macroblock_encode():宏塊編碼模塊。該模塊通過對殘差的DCT變換、量化等方式對宏塊進行編碼。
x264_macroblock_write_cabac():CABAC熵編碼模塊。
x264_macroblock_write_cavlc():CAVLC熵編碼模塊。
x264_macroblock_cache_save():保存當前宏塊的信息。
x264_ratecontrol_mb():碼率控制。
x264_nal_end():結束寫一個NALU。
本文將會對上述函數進行分析。其中x264_fdec_filter_row(),x264_macroblock_analyse(),x264_macroblock_encode(),x264_macroblock_write_cabac()/x264_macroblock_write_cavlc()只做概述,後續文章中再做分析。
x264_slice_write()
x264_slice_write()用於編碼一個Slice。該函數的定義位於encoder\encoder.c,如下所示。
- /****************************************************************************
- * 真正的編碼——編碼1個Slice
- * 註釋和處理:雷霄驊
- * http://blog.csdn.net/leixiaohua1020
- * [email protected]
- ****************************************************************************/
- static intptr_t x264_slice_write( x264_t *h )
- {
- int i_skip;
- //宏塊的序號,以及序號對應的x,y座標
- int mb_xy, i_mb_x, i_mb_y;
- /* NALUs other than the first use a 3-byte startcode.
- * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
- * Then add an extra 5 bytes just in case, to account for random NAL escapes and
- * other inaccuracies. */
- int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
- int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
- int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
- int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;
- int starting_bits = bs_pos(&h->out.bs);
- int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
- int b_hpel = h->fdec->b_kept_as_ref;
- int orig_last_mb = h->sh.i_last_mb;
- int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;
- uint8_t *last_emu_check;
- #define BS_BAK_SLICE_MAX_SIZE 0
- #define BS_BAK_CAVLC_OVERFLOW 1
- #define BS_BAK_SLICE_MIN_MBS 2
- #define BS_BAK_ROW_VBV 3
- x264_bs_bak_t bs_bak[4];
- b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
- bs_realign( &h->out.bs );
-
- /* Slice */
- //開始輸出一個NAL
- //後面對應着x264_nal_end()
- x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
- h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
-
- /* Slice header */
-
- //存儲宏塊像素的緩存fdec_buf和fenc_buf的初始化
- //宏塊編碼緩存p_fenc[0],p_fenc[1],p_fenc[2]
- //宏塊重建緩存p_fdec[0],p_fdec[1],p_fdec[2]
- //[0]存Y,[1]存U,[2]存V
- x264_macroblock_thread_init( h );
-
- /* Set the QP equal to the first QP in the slice for more accurate CABAC initialization. */
- h->mb.i_mb_xy = h->sh.i_first_mb;
- h->sh.i_qp = x264_ratecontrol_mb_qp( h );
- h->sh.i_qp = SPEC_QP( h->sh.i_qp );
- h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;
- //輸出 slice header
- x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
- //如果使用CABAC,需要初始化
- if( h->param.b_cabac )
- {
- /* alignment needed */
- bs_align_1( &h->out.bs );
-
- /* init cabac */
- x264_cabac_context_init( h, &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
- x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
- last_emu_check = h->cabac.p;
- }
- else
- last_emu_check = h->out.bs.p;
- h->mb.i_last_qp = h->sh.i_qp;
- h->mb.i_last_dqp = 0;
- h->mb.field_decoding_flag = 0;
- //宏塊位置-縱座標(初始值)
- i_mb_y = h->sh.i_first_mb / h->mb.i_mb_width;
- //宏塊位置-橫座標(初始值)
- i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
- i_skip = 0;
-
- //一個大循環
- //對一個slice中每個宏塊進行編碼
- while( 1 )
- {
- //宏塊序號。由i_mb_x和i_mb_y計算而來。
- mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
- int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- //一行的開始
- if( i_mb_x == 0 )
- {
- if( x264_bitstream_check_buffer( h ) )
- return -1;
- if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
- x264_bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 );
- //去塊效應濾波、半像素插值、SSIM/PSNR計算等
- //一次處理一行宏塊
- if( !h->mb.b_reencode_mb )
- x264_fdec_filter_row( h, i_mb_y, 0 );
- }
-
- if( back_up_bitstream )
- {
- if( back_up_bitstream_cavlc )
- x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
- if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
- {
- x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
- if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
- x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
- }
- }
-
- if( PARAM_INTERLACED )
- {
- if( h->mb.b_adaptive_mbaff )
- {
- if( !(i_mb_y&1) )
- {
- /* FIXME: VSAD is fast but fairly poor at choosing the best interlace type. */
- h->mb.b_interlaced = x264_field_vsad( h, i_mb_x, i_mb_y );
- memcpy( &h->zigzagf, MB_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
- if( !MB_INTERLACED && (i_mb_y+2) == h->mb.i_mb_height )
- x264_expand_border_mbpair( h, i_mb_x, i_mb_y );
- }
- }
- h->mb.field[mb_xy] = MB_INTERLACED;
- }
-
- /* load cache */
- //將要編碼的宏塊的周圍的宏塊的值讀進來
- //主要是上面、左邊塊的值
- if( SLICE_MBAFF )
- x264_macroblock_cache_load_interlaced( h, i_mb_x, i_mb_y );
- else
- x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y );
- //分析-幀內預測模式選擇、幀間運動估計等
- x264_macroblock_analyse( h );
-
- /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
- reencode:
- //編碼-殘差DCT變換、量化
- x264_macroblock_encode( h );
- //輸出CABAC
- if( h->param.b_cabac )
- {
- if( mb_xy > h->sh.i_first_mb && !(SLICE_MBAFF && (i_mb_y&1)) )
- x264_cabac_encode_terminal( &h->cabac );
-
- if( IS_SKIP( h->mb.i_type ) )
- x264_cabac_mb_skip( h, 1 );
- else
- {
- if( h->sh.i_type != SLICE_TYPE_I )
- x264_cabac_mb_skip( h, 0 );
- //輸出
- x264_macroblock_write_cabac( h, &h->cabac );
- }
- }
- else
- {
- //輸出CAVLC
- if( IS_SKIP( h->mb.i_type ) )
- i_skip++;
- else
- {
- if( h->sh.i_type != SLICE_TYPE_I )
- {
- bs_write_ue( &h->out.bs, i_skip ); /* skip run */
- i_skip = 0;
- }
- //輸出
- x264_macroblock_write_cavlc( h );
- /* If there was a CAVLC level code overflow, try again at a higher QP. */
- if( h->mb.b_overflow )
- {
- h->mb.i_chroma_qp = h->chroma_qp_table[++h->mb.i_qp];
- h->mb.i_skip_intra = 0;
- h->mb.b_skip_mc = 0;
- h->mb.b_overflow = 0;
- x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
- goto reencode;
- }
- }
- }
-
- int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- int mb_size = total_bits - mb_spos;
-
- if( slice_max_size && (!SLICE_MBAFF || (i_mb_y&1)) )
- {
- /* Count the skip run, just in case. */
- if( !h->param.b_cabac )
- total_bits += bs_size_ue_big( i_skip );
- /* Check for escape bytes. */
- uint8_t *end = h->param.b_cabac ? h->cabac.p : h->out.bs.p;
- for( ; last_emu_check < end - 2; last_emu_check++ )
- if( last_emu_check[0] == 0 && last_emu_check[1] == 0 && last_emu_check[2] <= 3 )
- {
- slice_max_size -= 8;
- last_emu_check++;
- }
- /* We'll just re-encode this last macroblock if we go over the max slice size. */
- if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
- {
- if( !x264_frame_new_slice( h, h->fdec ) )
- {
- /* Handle the most obnoxious slice-min-mbs edge case: we need to end the slice
- * because it's gone over the maximum size, but doing so would violate slice-min-mbs.
- * If possible, roll back to the last checkpoint and try again.
- * We could try raising QP, but that would break in the case where a slice spans multiple
- * rows, which the re-encoding infrastructure can't currently handle. */
- if( mb_xy <= thread_last_mb && (thread_last_mb+1-mb_xy) < h->param.i_slice_min_mbs )
- {
- if( thread_last_mb-h->param.i_slice_min_mbs < h->sh.i_first_mb+h->param.i_slice_min_mbs )
- {
- x264_log( h, X264_LOG_WARNING, "slice-max-size violated (frame %d, cause: slice-min-mbs)\n", h->i_frame );
- slice_max_size = 0;
- goto cont;
- }
- x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 );
- h->mb.b_reencode_mb = 1;
- h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs;
- break;
- }
- if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb )
- {
- x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
- h->mb.b_reencode_mb = 1;
- if( SLICE_MBAFF )
- {
- // set to bottom of previous mbpair
- if( i_mb_x )
- h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1));
- else
- h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1;
- }
- else
- h->sh.i_last_mb = mb_xy-1;
- break;
- }
- else
- h->sh.i_last_mb = mb_xy;
- }
- else
- slice_max_size = 0;
- }
- }
- cont:
- h->mb.b_reencode_mb = 0;
-
- /* save cache */
- //保存當前宏塊的的值,用於以後的宏塊的編碼
- //包括Intra4x4宏塊幀內預測模式,DCT非零係數,運動矢量,參考幀序號等等
- x264_macroblock_cache_save( h );
- //碼率控制
- if( x264_ratecontrol_mb( h, mb_size ) < 0 )
- {
- x264_bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 );
- h->mb.b_reencode_mb = 1;
- i_mb_x = 0;
- i_mb_y = i_mb_y - SLICE_MBAFF;
- h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1;
- h->sh.i_last_mb = orig_last_mb;
- continue;
- }
-
- /* accumulate mb stats */
- //後面很大一段代碼都是對stat結構體中的統計信息進行賦值================================
- h->stat.frame.i_mb_count[h->mb.i_type]++;
-
- int b_intra = IS_INTRA( h->mb.i_type );
- int b_skip = IS_SKIP( h->mb.i_type );
- if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
- {
- if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
- {
- if( h->mb.i_partition != D_8x8 )
- h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
- else
- for( int i = 0; i < 4; i++ )
- h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
- if( h->param.i_frame_reference > 1 )
- for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
- for( int i = 0; i < 4; i++ )
- {
- int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
- if( i_ref >= 0 )
- h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
- }
- }
- }
-
- if( h->param.i_log_level >= X264_LOG_INFO )
- {
- if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
- {
- if( CHROMA444 )
- {
- for( int i = 0; i < 4; i++ )
- if( h->mb.i_cbp_luma & (1 << i) )
- for( int p = 0; p < 3; p++ )
- {
- int s8 = i*4+p*16;
- int nnz8x8 = M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+0] )
- | M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+8] );
- h->stat.frame.i_mb_cbp[!b_intra + p*2] += !!nnz8x8;
- }
- }
- else
- {
- int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
- + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
- h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
- h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
- h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
- }
- }
- if( h->mb.i_cbp_luma && !b_intra )
- {
- h->stat.frame.i_mb_count_8x8dct[0] ++;
- h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
- }
- if( b_intra && h->mb.i_type != I_PCM )
- {
- if( h->mb.i_type == I_16x16 )
- h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
- else if( h->mb.i_type == I_8x8 )
- for( int i = 0; i < 16; i += 4 )
- h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- else //if( h->mb.i_type == I_4x4 )
- for( int i = 0; i < 16; i++ )
- h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- h->stat.frame.i_mb_pred_mode[3][x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode]]++;
- }
- h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
- }
- //===========================================================
-
- /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
- //計算去塊效應濾波器強度Bs
- //這裏沒有濾波
- if( b_deblock )
- x264_macroblock_deblock_strength( h );
-
- //如果處理完最後一個宏塊,就跳出大循環
- if( mb_xy == h->sh.i_last_mb )
- break;
-
- if( SLICE_MBAFF )
- {
- i_mb_x += i_mb_y & 1;
- i_mb_y ^= i_mb_x < h->mb.i_mb_width;
- }
- else
- i_mb_x++;//宏塊序號x加1
- //處理完一行宏塊
- if( i_mb_x == h->mb.i_mb_width )
- {
- //該處理下一行了
- i_mb_y++;//宏塊序號y加1
- i_mb_x = 0;//宏塊序號x設置爲0
- }
- }
- if( h->sh.i_last_mb < h->sh.i_first_mb )
- return 0;
-
- h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
-
- //熵編碼的收尾工作
- if( h->param.b_cabac )
- {
- x264_cabac_encode_flush( h, &h->cabac );
- h->out.bs.p = h->cabac.p;
- }
- else
- {
- if( i_skip > 0 )
- bs_write_ue( &h->out.bs, i_skip ); /* last skip run */
- /* rbsp_slice_trailing_bits */
- bs_rbsp_trailing( &h->out.bs );
- bs_flush( &h->out.bs );
- }
- //結束輸出一個NAL
- //前面對應着x264_nal_start()
- if( x264_nal_end( h ) )
- return -1;
- //多線程並行處理?
- if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) )
- {
- h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
- + (h->out.i_nal*NALU_OVERHEAD * 8)
- - h->stat.frame.i_tex_bits
- - h->stat.frame.i_mv_bits;
- x264_fdec_filter_row( h, h->i_threadslice_end, 0 );
-
- if( h->param.b_sliced_threads )
- {
- /* Tell the main thread we're done. */
- x264_threadslice_cond_broadcast( h, 1 );
- /* Do hpel now */
- for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ )
- x264_fdec_filter_row( h, mb_y, 1 );
- x264_threadslice_cond_broadcast( h, 2 );
- /* Do the first row of hpel, now that the previous slice is done */
- if( h->i_thread_idx > 0 )
- {
- x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 );
- x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 );
- }
- }
-
- /* Free mb info after the last thread's done using it */
- if( h->fdec->mb_info_free && (!h->param.b_sliced_threads || h->i_thread_idx == (h->param.i_threads-1)) )
- {
- h->fdec->mb_info_free( h->fdec->mb_info );
- h->fdec->mb_info = NULL;
- h->fdec->mb_info_free = NULL;
- }
- }
-
- return 0;
- }
根據源代碼簡單梳理了x264_slice_write()的流程,如下所示:
(1)調用x264_nal_start()開始輸出一個NALU。
(2)x264_macroblock_thread_init():初始化宏塊重建像素緩存fdec_buf[]和編碼像素緩存fenc_buf[]。
(3)調用x264_slice_header_write()輸出 Slice Header。
(4)進入一個循環,該循環每執行一遍編碼一個宏塊:
a) 每處理一行宏塊,調用一次x264_fdec_filter_row()執行濾波模塊。
b) 調用x264_macroblock_cache_load_progressive()將要編碼的宏塊的周圍的宏塊的信息讀進來。
c) 調用x264_macroblock_analyse()執行分析模塊。
d) 調用x264_macroblock_encode()執行宏塊編碼模塊。
e) 調用x264_macroblock_write_cabac()/x264_macroblock_write_cavlc()執行熵編碼模塊。
f) 調用x264_macroblock_cache_save()保存當前宏塊的信息。
g) 調用x264_ratecontrol_mb()執行碼率控制。
h) 準備處理下一個宏塊。
(5)調用x264_nal_end()結束輸出一個NALU。
下文分別從數據結構和函數兩個方面分析x264_slice_write()的源代碼。
重要的數據結構
X264在宏塊編碼方面涉及到下面幾個比較重要的結構體:
宏塊像素存儲緩存fenc_buf[]和fdec_buf[]——位於x264_t.mb.pic中,用於存儲宏塊的亮度和色度像素。
宏塊各種信息的緩存Cache——位於x264_t.mb.pic中,用於存儲宏塊的信息例如4x4幀內預測模式、DCT的非0係數個數、運動矢量、參考幀序號等。
圖像半像素點存儲空間filtered[]——位於x264_frame_t中,用於存儲半像素插值後的點。
宏塊像素存儲緩存fenc_buf[]和fdec_buf[]
fenc_buf[]和fdec_buf[]爲x264_t.mb.cache中的結構體,用於存儲一個宏塊的像素數據。其中fenc_buf[]用於存儲宏塊編碼像素數據,而fdec_buf[]用於存儲宏塊重建像素數據。他們的定義如下所示。
- /* space for p_fenc and p_fdec */
- #define FENC_STRIDE 16
- #define FDEC_STRIDE 32
- //存儲編碼宏塊fenc和重建宏塊fdec的內存
- uint8_t fenc_buf[48*FENC_STRIDE]
- uint8_t fdec_buf[52*FDEC_STRIDE]
從定義可以看出,fenc_buf[]每行16個數據;而fdec_buf[]每行32個數據。在x264_t.mb.cache中和fenc_buf[]和fdec_buf[]相關的指針數組還有p_fenc[3]和p_fdec[3],它們中的3個元素[0]、[1]、[2]分別指向分別指向對應緩存buf的Y、U、V分量。下圖畫出了像素格式爲YUV420P的時候fenc_buf[]的存儲示意圖。圖中灰色區域存儲Y,藍色區域存儲U,粉紅區域存儲V。p_fenc[0]指向Y的存儲區域,p_fenc[1]指向U的存儲區域,p_fenc[2]指向V的存儲區域,在圖中以方框的形式標註了出來。
下圖畫出了像素格式爲YUV420P的時候fdec_buf[]的存儲示意圖。圖中灰色區域存儲Y,藍色區域存儲U,粉紅區域存儲V。p_fenc[0]指向Y的存儲區域,p_fenc[1]指向U的存儲區域,p_fenc[2]指向V的存儲區域,在圖中以方框的形式標註了出來。
從圖中可以看出,fdec_buf[]和fenc_buf[]主要的區別在於fdec_buf[]像素塊的左邊和上邊包含了左上方相鄰塊用於預測的像素。
宏塊各種信息的緩存Cache
在x264中x264_t.mb.cache結構體中包含了存儲宏塊信息的各種各樣的緩存Cache。例如:
intra4x4_pred_mode:Intra4x4幀內預測模式的緩存
non_zero_count:DCT的非0係數個數的緩存
mv:運動矢量緩存
ref:運動矢量參考幀的緩存
這幾個Cache的定義如下所示。
- /* 宏塊信息緩存cache */
- struct
- {
- /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
- /*
- * mb.cache.intra4x4_pred_mode[]格式如下
- * |
- * --+--------------
- * | 0 0 0 y y y y y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
-
- /* i_non_zero_count if available else 0x80 */
- /*
- * mb.cache.non_zero_count[]格式如下
- * |
- * --+--------------
- * | 0 0 0 y y y y y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 u u u u u
- * | 0 0 0 u U U U U
- * | 0 0 0 u U U U U
- * | 0 0 0 u U U U U
- * | 0 0 0 u U U U U
- * | 0 0 0 v v v v v
- * | 0 0 0 v V V V V
- * | 0 0 0 v V V V V
- * | 0 0 0 v V V V V
- * | 0 0 0 v V V V V
- */
- ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
-
- /* -1 if unused, -2 if unavailable */
- /*
- * mb.cache.ref[0][]格式如下
- * |
- * --+--------------
- * | 0 0 0 y y y y y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
-
- /* 0 if not available */
- /*
- * mb.cache.mv[0][]格式如下
- * |
- * --+--------------
- * | 0 0 0 y y y y y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
- ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );
-
- /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
- ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );
-
- ALIGNED_4( int16_t direct_mv[2][4][2] );
- ALIGNED_4( int8_t direct_ref[2][4] );
- int direct_partition;
- ALIGNED_4( int16_t pskip_mv[2] );
-
- /* number of neighbors (top and left) that used 8x8 dct */
- int i_neighbour_transform_size;
- int i_neighbour_skip;
-
- /* neighbor CBPs */
- int i_cbp_top;
- int i_cbp_left;
-
- /* extra data required for mbaff in mv prediction */
- int16_t topright_mv[2][3][2];
- int8_t topright_ref[2][3];
-
- /* current mb deblock strength */
- uint8_t (*deblock_strength)[8][4];
- } cache;
通過觀察上面的定義,會發現Cache都是一個包含x*8個元素的一維數組(x取15或者5)。Cache使用一維數組比較形象的存儲了二維圖像的信息。從上面的代碼可以看出Cache中存儲有效數據的地方是一個位於右下角的「方形區域」,這一部分實際上對應一維數組中第12-15,20-23,28-31,36-39的元素。這個「方形區域」代表了一個宏塊的亮度相關的信息,其中一共包含16個元素。由於1個宏塊的亮度數據是1個16x16的塊,所以這個「方形區域」裏面1個元素實際上代表了一個4x4的塊的信息(「4x4」的亮度塊應該也是H.264壓縮編碼中最小的處理單元)。
如果我們使用12-15,20-23,28-31,36-39這些範圍內的下標引用Cache中的元素,實在是不太方便。由此也引出了x264中另一個關鍵的變量——scan8[]數組。
scan8[]
scan8[]存儲的是緩存的序號值,它一般情況下是與前面提到的Cache配合使用的。scan8[]的定義位於libavcodec\h264.h,如下所示。
- /* Scan8 organization:
- * 0 1 2 3 4 5 6 7
- * 0 DY y y y y y
- * 1 y Y Y Y Y
- * 2 y Y Y Y Y
- * 3 y Y Y Y Y
- * 4 y Y Y Y Y
- * 5 DU u u u u u
- * 6 u U U U U
- * 7 u U U U U
- * 8 u U U U U
- * 9 u U U U U
- * 10 DV v v v v v
- * 11 v V V V V
- * 12 v V V V V
- * 13 v V V V V
- * 14 v V V V V
- * DY/DU/DV are for luma/chroma DC.
- */
- /*
- * 掃描方式:
- * o-o o-o
- * / / /
- * o-o o-o
- * ,---'
- * o-o o-o
- * / / /
- * o-o o-o
- */
- /*
- * 關於多次出現的scan8
- *
- * cache是一個表格。表格中存儲了一整個宏塊的信息,每一個元素代表了一個「4x4塊」(H.264中最小的處理單位)。
- * scan8[]則存儲了宏塊信息在cache中的索引值
- *
- * scan8[]中的「8」,意思應該是按照8x8爲單元來掃描?
- * 因此可以理解爲「按照8x8爲單元來掃描4x4的塊」?
- *
- * scan8中按照順序分別存儲了Y,U,V的索引值。具體的存儲還是在相應的cache中。
- *
- * cache中首先存儲Y,然後存儲U和V。cache中的存儲方式如下所示。
- * 其中數字代表了scan8[]中元素的索引值
- *
- * +---+---+---+---+---+---+---+---+---+
- * | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
- * +---+---+---+---+---+---+---+---+---+
- * | 0 | 48| | | | y| y| y| y|
- * | 1 | | | | y| 0| 1| 4| 5|
- * | 2 | | | | y| 2| 3| 6| 7|
- * | 3 | | | | y| 8| 9| 12| 13|
- * | 4 | | | | y| 10| 11| 14| 15|
- * | 5 | 49| | | | u| u| u| u|
- * | 6 | | | | u| 16| 17| 20| 21|
- * | 7 | | | | u| 18| 19| 22| 23|
- * | 8 | | | | u| 24| 25| 28| 29|
- * | 9 | | | | u| 26| 27| 30| 31|
- * |10 | 50| | | | v| v| v| v|
- * |11 | | | | v| 32| 33| 36| 37|
- * |12 | | | | v| 34| 35| 38| 39|
- * |13 | | | | v| 40| 41| 44| 45|
- * |14 | | | | v| 42| 43| 46| 47|
- * |---+---+---+---+---+---+---+---+---+
- * | |
- *
- */
-
- #define LUMA_DC 48
- #define CHROMA_DC 49
-
- static const uint8_t x264_scan8[16*3 + 3] =
- {
- 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
- 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
- 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
- 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
- 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
- 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
- 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
- 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
- 4+11*8, 5+11*8, 4+12*8, 5+12*8,
- 6+11*8, 7+11*8, 6+12*8, 7+12*8,
- 4+13*8, 5+13*8, 4+14*8, 5+14*8,
- 6+13*8, 7+13*8, 6+14*8, 7+14*8,
- 0+ 0*8, 0+ 5*8, 0+10*8
- };
可以看出scan8[]數組中元素的值都是以「a+b*8」的形式寫的,我們不妨計算一下前面16個元素的值:
scan8[0]=12
scan8[1]= 13
scan8[2]= 20
scan8[3]= 21
scan8[4]= 14
scan8[5]= 15
scan8[6]= 22
scan8[7]= 23
scan8[8]= 28
scan8[9]= 29
scan8[10]= 36
scan8[11]= 37
scan8[12]= 30
scan8[13]= 31
scan8[14]= 38
scan8[15]= 39
如果把scan8[]數組這些元素的值,作爲Cache(例如mv[],ref[]等)的序號,會發現他們的在Cache中代表的元素的位置如下圖所示。
上圖中灰色背景的元素即爲Cache中有效的元素(不使用左邊的空白區域的元素可能是由於歷史原因)。直接使用Cache元素序號可能感覺比較抽象,下圖使用scan8[]數組元素序號表示Cache中存儲的數據,則結果如下圖所示。
圖中每個元素代表了一個4x4的塊的信息,每個由16個元素組成的「大方塊」代表了1個宏塊的1個分量的信息。灰色背景的「大方塊」存儲的是宏塊中亮度Y相關的信息,藍色背景的「大方塊」存儲的是宏塊中色度U相關的信息,粉紅背景的「大方塊」存儲的是宏塊中色度U相關的信息。
PS:有關scan8[]數組在網上能查到一點資料。但是經過源代碼比對之後,我發現網上的資料已經過時了。舊版本scan8[]代表的Cache的存儲方式如下所示。
可以看出舊版本的scan8[]中U、V是存儲在Y的左邊的區域,而且每個分量只有4個元素,而新版本的scan8[]中U、V是存儲在Y的下邊的區域,而且每個分量有16個元素。
圖像半像素點存儲緩存filtered[]
X264中在圖像運動搜索的過程中,需要使用1/4像素精度的運動補償。其中半像素點的內插工作是提前完成的。每一幀的半像素點存儲在x264_frame_t的filtered[3][4]變量中。其中前面的「[3]」代表Y,U,V三個分量,後面的「[4]」分別存儲了整像素, H半像素, V半像素, C(對角線)半像素的數據。
下面的圖以4x4圖像塊爲例演示了filtered[][4]中幾種半像素點與整像素點之間的位置關係。圖中灰色的點爲整像素點,黃色的點爲半像素點。filtered[][0]存儲了整像素點數據,filtered[][1]存儲了H半像素點數據,filtered[][2]存儲了V半像素點數據,filtered[][3]存儲了C(對角線)半像素點數據。
重要的函數
下文簡單記錄x264_slice_write()中調用的幾個函數:
x264_macroblock_thread_init():初始化宏塊重建數據緩存fdec_buf[]和編碼數據緩存fenc_buf[]。
x264_slice_header_write():輸出 Slice Header。
x264_macroblock_cache_load():將要編碼的宏塊的周圍的宏塊的信息讀進來。
x264_macroblock_cache_save():保存當前宏塊的信息。
另外還有一些關鍵模塊對應的函數將會在後續文章中進行分析:
x264_fdec_filter_row():濾波模塊。該模塊包含了環路濾波,半像素插值,SSIM/PSNR的計算。
x264_macroblock_analyse():分析模塊。該模塊包含了幀內預測模式分析以及幀間運動估計等。
x264_macroblock_encode():宏塊編碼模塊。該模塊通過對殘差的DCT變換、量化等方式對宏塊進行編碼。
x264_macroblock_write_cabac():CABAC熵編碼模塊。
x264_macroblock_write_cavlc():CAVLC熵編碼模塊。
x264_macroblock_thread_init()
x264_macroblock_thread_init()用於初始化宏塊重建數據緩存fdec_buf[]和編碼數據緩存fenc_buf[]。該函數的定義位於common\macroblock.c,如下所示。
從源代碼可以看出,x264_macroblock_thread_init()設定了宏塊編碼數據指針p_fenc[0],p_fenc[1],p_fenc[2]在fenc_buf[]中的位置,以及宏塊重建數據指針p_fdec[0],p_fdec[1],p_fdec[2] 在fdec_buf[]中的位置。由於前文中已經介紹過fenc_buf[]和fdec_buf[]的結構,在這裏不再重複。
x264_slice_header_write()
x264_slice_header_write()用於輸出Slice Header。該函數的定義位於encoder\encoder.c,如下所示。
- //輸出 slice header
- static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc )
- {
- if( sh->b_mbaff )
- {
- int first_x = sh->i_first_mb % sh->sps->i_mb_width;
- int first_y = sh->i_first_mb / sh->sps->i_mb_width;
- assert( (first_y&1) == 0 );
- bs_write_ue( s, (2*first_x + sh->sps->i_mb_width*(first_y&~1) + (first_y&1)) >> 1 );
- }
- else
- bs_write_ue( s, sh->i_first_mb );//first_mb_in_slice: Slice中的第一個宏塊的地址
- //slice_type: Slice類型(I,B,P,SI,SP)
- bs_write_ue( s, sh->i_type + 5 ); /* same type things */
- //pic_parameter_set_id: PPS的索引號
- bs_write_ue( s, sh->i_pps_id );
- //frame_num: 指明瞭各圖像的解碼順序
- bs_write( s, sh->sps->i_log2_max_frame_num, sh->i_frame_num & ((1<<sh->sps->i_log2_max_frame_num)-1) );
-
- if( !sh->sps->b_frame_mbs_only )
- {
- bs_write1( s, sh->b_field_pic );
- if( sh->b_field_pic )
- bs_write1( s, sh->b_bottom_field );
- }
-
- if( sh->i_idr_pic_id >= 0 ) /* NAL IDR */
- bs_write_ue( s, sh->i_idr_pic_id );//idr_pic_id: IDR圖像的標識
-
- if( sh->sps->i_poc_type == 0 )
- {
- bs_write( s, sh->sps->i_log2_max_poc_lsb, sh->i_poc & ((1<<sh->sps->i_log2_max_poc_lsb)-1) );
- if( sh->pps->b_pic_order && !sh->b_field_pic )
- bs_write_se( s, sh->i_delta_poc_bottom );
- }
-
- if( sh->pps->b_redundant_pic_cnt )
- bs_write_ue( s, sh->i_redundant_pic_cnt );
-
- if( sh->i_type == SLICE_TYPE_B )
- bs_write1( s, sh->b_direct_spatial_mv_pred );
-
- if( sh->i_type == SLICE_TYPE_P || sh->i_type == SLICE_TYPE_B )
- {
- bs_write1( s, sh->b_num_ref_idx_override );
- if( sh->b_num_ref_idx_override )
- {
- bs_write_ue( s, sh->i_num_ref_idx_l0_active - 1 );
- if( sh->i_type == SLICE_TYPE_B )
- bs_write_ue( s, sh->i_num_ref_idx_l1_active - 1 );
- }
- }
-
- /* ref pic list reordering */
- if( sh->i_type != SLICE_TYPE_I )
- {
- bs_write1( s, sh->b_ref_pic_list_reordering[0] );
- if( sh->b_ref_pic_list_reordering[0] )
- {
- for( int i = 0; i < sh->i_num_ref_idx_l0_active; i++ )
- {
- bs_write_ue( s, sh->ref_pic_list_order[0][i].idc );
- bs_write_ue( s, sh->ref_pic_list_order[0][i].arg );
- }
- bs_write_ue( s, 3 );
- }
- }
- if( sh->i_type == SLICE_TYPE_B )
- {
- bs_write1( s, sh->b_ref_pic_list_reordering[1] );
- if( sh->b_ref_pic_list_reordering[1] )
- {
- for( int i = 0; i < sh->i_num_ref_idx_l1_active; i++ )
- {
- bs_write_ue( s, sh->ref_pic_list_order[1][i].idc );
- bs_write_ue( s, sh->ref_pic_list_order[1][i].arg );
- }
- bs_write_ue( s, 3 );
- }
- }
-
- sh->b_weighted_pred = 0;
- if( sh->pps->b_weighted_pred && sh->i_type == SLICE_TYPE_P )
- {
- sh->b_weighted_pred = sh->weight[0][0].weightfn || sh->weight[0][1].weightfn || sh->weight[0][2].weightfn;
- /* pred_weight_table() */
- bs_write_ue( s, sh->weight[0][0].i_denom );
- bs_write_ue( s, sh->weight[0][1].i_denom );
- for( int i = 0; i < sh->i_num_ref_idx_l0_active; i++ )
- {
- int luma_weight_l0_flag = !!sh->weight[i][0].weightfn;
- int chroma_weight_l0_flag = !!sh->weight[i][1].weightfn || !!sh->weight[i][2].weightfn;
- bs_write1( s, luma_weight_l0_flag );
- if( luma_weight_l0_flag )
- {
- bs_write_se( s, sh->weight[i][0].i_scale );
- bs_write_se( s, sh->weight[i][0].i_offset );
- }
- bs_write1( s, chroma_weight_l0_flag );
- if( chroma_weight_l0_flag )
- {
- for( int j = 1; j < 3; j++ )
- {
- bs_write_se( s, sh->weight[i][j].i_scale );
- bs_write_se( s, sh->weight[i][j].i_offset );
- }
- }
- }
- }
- else if( sh->pps->b_weighted_bipred == 1 && sh->i_type == SLICE_TYPE_B )
- {
- /* TODO */
- }
-
- if( i_nal_ref_idc != 0 )
- {
- if( sh->i_idr_pic_id >= 0 )
- {
- bs_write1( s, 0 ); /* no output of prior pics flag */
- bs_write1( s, 0 ); /* long term reference flag */
- }
- else
- {
- bs_write1( s, sh->i_mmco_command_count > 0 ); /* adaptive_ref_pic_marking_mode_flag */
- if( sh->i_mmco_command_count > 0 )
- {
- for( int i = 0; i < sh->i_mmco_command_count; i++ )
- {
- bs_write_ue( s, 1 ); /* mark short term ref as unused */
- bs_write_ue( s, sh->mmco[i].i_difference_of_pic_nums - 1 );
- }
- bs_write_ue( s, 0 ); /* end command list */
- }
- }
- }
-
- if( sh->pps->b_cabac && sh->i_type != SLICE_TYPE_I )
- bs_write_ue( s, sh->i_cabac_init_idc );
-
- //slice_qp_delta: 指出在用於當前片的所有宏塊的量化參數的初始值
- //SliceQP = 26 + pic_init_qp_minus26 + slice_qp_delta
- bs_write_se( s, sh->i_qp_delta ); /* slice qp delta */
-
- if( sh->pps->b_deblocking_filter_control )
- {
- bs_write_ue( s, sh->i_disable_deblocking_filter_idc );
- if( sh->i_disable_deblocking_filter_idc != 1 )
- {
- bs_write_se( s, sh->i_alpha_c0_offset >> 1 );
- bs_write_se( s, sh->i_beta_offset >> 1 );
- }
- }
- }
有關x264_slice_header_write()的源代碼不再做詳細的分析。其中Slice Header的結構參考《H.264標準》即可。
x264_fdec_filter_row()
x264_fdec_filter_row()屬於濾波模塊,完成幾種濾波工作:
(1)半像素內插
(2)環路濾波
(3)PSNR/SSIM計算
下面簡單記錄一下半像素內插和環路濾波的概念(後續文章再對源代碼進行分析)。
(1)半像素插值知識簡述
簡單記錄一下半像素插值的知識。《H.264標準》中規定,運動估計爲1/4像素精度。因此在H.264編碼和解碼的過程中,需要將畫面中的像素進行插值——簡單地說就是把原先的1個像素點拓展成4x4一共16個點。下圖顯示了H.264編碼和解碼過程中像素插值情況。可以看出原先的G點的右下方通過插值的方式產生了a、b、c、d等一共16個點。
如圖所示,1/4像素內插一般分成兩步:
(1)半像素內插。這一步通過6抽頭濾波器獲得5個半像素點。
(2)線性內插。這一步通過簡單的線性內插獲得剩餘的1/4像素點。
圖中半像素內插點爲b、m、h、s、j五個點。半像素內插方法是對整像素點進行6 抽頭濾波得出,濾波器的權重爲(1/32, -5/32, 5/8, 5/8, -5/32, 1/32)。例如b的計算公式爲:
b=round( (E - 5F + 20G + 20H - 5I + J ) / 32)
剩下幾個半像素點的計算關係如下:
m:由B、D、H、N、S、U計算
h:由A、C、G、M、R、T計算
s:由K、L、M、N、P、Q計算
j:由cc、dd、h、m、ee、ff計算。需要注意j點的運算量比較大,因爲cc、dd、ee、ff都需要通過半像素內插方法進行計算。
在獲得半像素點之後,就可以通過簡單的線性內插獲得1/4像素內插點了。1/4像素內插的方式如下圖所示。例如圖中a點的計算公式如下:
A=round( (G+b)/2 )
在這裏有一點需要注意:位於4個角的e、g、p、r四個點並不是通過j點計算計算的,而是通過b、h、s、m四個半像素點計算的。
(2)環路濾波相關知識簡述
簡單記錄一下環路濾波(去塊效應濾波)的知識。X264的重建幀(通過解碼得到)一般情況下會出現方塊效應。產生這種效應的原因主要有兩個:
(1)DCT變換後的量化造成誤差(主要原因)。
(2)運動補償
正是由於這種塊效應的存在,才需要添加環路濾波器調整相鄰的「塊」邊緣上的像素值以減輕這種視覺上的不連續感。下面一張圖顯示了環路濾波的效果。圖中左邊的圖沒有使用環路濾波,而右邊的圖使用了環路濾波。
環路濾波分類
環路濾波器根據濾波的強度可以分爲兩種:
(1)普通濾波器。針對邊界的Bs(邊界強度)爲1、2、3的濾波器。此時環路濾波涉及到方塊邊界周圍的6個點(邊界兩邊各3個點):p2,p1,p0,q0,q1,q2。需要處理4個點(邊界兩邊各2個點,只以p點爲例):
p0’ = p0 + (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3
p1’ = ( p2 + ( ( p0 + q0 + 1 ) >> 1) – 2p1 ) >> 1
(2)強濾波器。針對邊界的Bs(邊界強度)爲4的濾波器。此時環路濾波涉及到方塊邊界周圍的8個點(邊界兩邊各4個點):p3,p2,p1,p0,q0,q1,q2,q3。需要處理6個點(邊界兩邊各3個點,只以p點爲例):
p0’ = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3
p1’ = ( p2 + p1 + p0 + q0 + 2 ) >> 2
p2’ = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3
其中上文中提到的邊界強度Bs的判定方式如下。
條件(針對兩邊的圖像塊) |
Bs |
有一個塊爲幀內預測 + 邊界爲宏塊邊界 |
4 |
有一個塊爲幀內預測 |
3 |
有一個塊對殘差編碼 |
2 |
運動矢量差不小於1像素 |
1 |
運動補償參考幀不同 |
1 |
其它 |
0 |
總體說來,與幀內預測相關的圖像塊(幀內預測塊)的邊界強度比較大,取值爲3或者4;與運動補償相關的圖像塊(幀間預測塊)的邊界強度比較小,取值爲1。
環路濾波的門限
並不是所有的塊的邊界處都需要環路濾波。例如畫面中物體的邊界正好和塊的邊界重合的話,就不能進行濾波,否則會使畫面中物體的邊界變模糊。因此需要區別開物體邊界和塊效應邊界。一般情況下,物體邊界兩邊的像素值差別很大,而塊效應邊界兩邊像素值差別比較小。《H.264標準》以這個特點定義了2個變量alpha和beta來判決邊界是否需要進行環路濾波。只有滿足下面三個條件的時候才能進行環路濾波:
| p0 - q0 | < alpha
| p1 – p0 | < beta
| q1 - q0 | < beta
簡而言之,就是邊界兩邊的兩個點的像素值不能太大,即不能超過alpha;邊界一邊的前兩個點之間的像素值也不能太大,即不能超過beta。其中alpha和beta是根據量化參數QP推算出來(具體方法不再記錄)。總體說來QP越大,alpha和beta的值也越大,也就越容易觸發環路濾波。由於QP越大表明壓縮的程度越大,所以也可以得知高壓縮比的情況下更需要進行環路濾波。
x264_macroblock_cache_load()
x264_slice_write()根據是否包含隔行掃描,會分別調用x264_macroblock_cache_load_progressive()或者x264_macroblock_cache_load_interlaced()加載當前宏塊的周邊宏塊的信息。這兩個函數都會調用同一個函數x264_macroblock_cache_load()。上述兩個函數的定義位於common\macroblock.c,如下所示。
- //加載Cache-逐行掃描
- //即將要編碼的宏塊的周圍的宏塊的值讀進來
- void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load( h, mb_x, mb_y, 0 );
- }
- //加載Cache-隔行掃描
- void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load( h, mb_x, mb_y, 1 );
- }
x264_macroblock_cache_load()的定義位於common\macroblock.c,如下所示。
- //加載Cache
- //即將要編碼的宏塊的周圍的宏塊的值讀進來
- static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
- {
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
-
- //左邊宏塊
- int *left = h->mb.i_mb_left_xy;
- //上邊宏塊
- int top = h->mb.i_mb_top_xy;
- int top_y = h->mb.i_mb_top_y;
- int s8x8 = h->mb.i_b8_stride;
- int s4x4 = h->mb.i_b4_stride;
- int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
- int top_4x4 = (4*top_y+3) * s4x4 + 4*mb_x;
- int lists = (1 << h->sh.i_type) & 3;
-
- /* GCC pessimizes direct loads from heap-allocated arrays due to aliasing. */
- /* By only dereferencing them once, we avoid this issue. */
- int8_t (*i4x4)[8] = h->mb.intra4x4_pred_mode;
- //DCT非0係數個數
- uint8_t (*nnz)[48] = h->mb.non_zero_count;
- //CBP值
- int16_t *cbp = h->mb.cbp;
-
- const x264_left_table_t *left_index_table = h->mb.left_index_table;
-
- h->mb.cache.deblock_strength = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?h->mb.i_mb_xy:mb_x];
-
- /*
- *
- * 關於多次出現的scan8
- *
- * scan8是和cache配合使用的
- * cache是一個表格。表格中存儲了一整個宏塊的信息,每一個元素代表了一個「4x4亮度塊」(H.264中最小的亮度處理單位)。
- * scan8[]則存儲了宏塊信息在cache中的索引值
- *
- * scan8[]中的「8」,意思應該是按照8x8爲單元來掃描?
- * 因此可以理解爲「按照8x8爲單元來掃描4x4的塊」?
- *
- * scan8中按照順序分別存儲了Y,U,V信息在cache中的索引值。具體的存儲還是在相應的cache中。
- *
- * cache中首先存儲Y,然後存儲U和V。cache中的存儲方式如下所示。
- * 其中數字代表了scan8[]中元素的索引值
- *
- * +---+---+---+---+---+---+---+---+---+
- * | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
- * +---+---+---+---+---+---+---+---+---+
- * | 0 | 48| | | | y| y| y| y|
- * | 1 | | | | y| 0| 1| 4| 5|
- * | 2 | | | | y| 2| 3| 6| 7|
- * | 3 | | | | y| 8| 9| 12| 13|
- * | 4 | | | | y| 10| 11| 14| 15|
- * | 5 | 49| | | | u| u| u| u|
- * | 6 | | | | u| 16| 17| 20| 21|
- * | 7 | | | | u| 18| 19| 22| 23|
- * | 8 | | | | u| 24| 25| 28| 29|
- * | 9 | | | | u| 26| 27| 30| 31|
- * |10 | 50| | | | v| v| v| v|
- * |11 | | | | v| 32| 33| 36| 37|
- * |12 | | | | v| 34| 35| 38| 39|
- * |13 | | | | v| 40| 41| 44| 45|
- * |14 | | | | v| 42| 43| 46| 47|
- * |---+---+---+---+---+---+---+---+---+
- * | |
- *
- * 掃描方式:
- * o-o o-o
- * / / /
- * o-o o-o
- * ,---'
- * o-o o-o
- * / / /
- * o-o o-o
- *
- */
-
- /* load cache */
- if( h->mb.i_neighbour & MB_TOP )
- {
- h->mb.cache.i_cbp_top = cbp[top];
- /* load intra4x4 */
- /*
- * 填充intra4x4_pred_mode[]
- * 在這裏相當於在intra4x4_pred_mode[]填充了「y」,如下所示(沒有U、V)
- * |
- * --+--------------
- * | 0 0 0 0 y y y y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &i4x4[top][0] );
-
- /* load non_zero_count */
- /*
- * 填充non_zero_count[]
- * 在這裏相當於在non_zero_count[]填充了「y」,如下所示(只列出了Y。U、V是類似的)
- * |
- * --+--------------
- * | 0 0 0 0 y y y y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- CP32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8], &nnz[top][12] ); //Y
- CP32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8], &nnz[top][16-4 + (16>>CHROMA_V_SHIFT)] ); //U
- CP32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8], &nnz[top][32-4 + (16>>CHROMA_V_SHIFT)] ); //V
-
- /* Finish the prefetching */
- for( int l = 0; l < lists; l++ )
- {
- x264_prefetch( &h->mb.mv[l][top_4x4-1] );
- /* Top right being not in the same cacheline as top left will happen
- * once every 4 MBs, so one extra prefetch is worthwhile */
- x264_prefetch( &h->mb.mv[l][top_4x4+4] );
- x264_prefetch( &h->mb.ref[l][top_8x8-1] );
- x264_prefetch( &h->mb.mvd[l][top] );
- }
- }
- else
- {
- //沒有相關信息的時候,填充下列數據
-
- h->mb.cache.i_cbp_top = -1;
-
- /* load intra4x4 */
- M32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] ) = 0xFFFFFFFFU;
-
- /* load non_zero_count */
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8] ) = 0x80808080U;
- M32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8] ) = 0x80808080U;
- M32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8] ) = 0x80808080U;
- }
-
- if( h->mb.i_neighbour & MB_LEFT )
- {
- int ltop = left[LTOP];
- int lbot = b_mbaff ? left[LBOT] : ltop;
- if( b_mbaff )
- {
- const int16_t top_luma = (cbp[ltop] >> (left_index_table->mv[0]&(~1))) & 2;
- const int16_t bot_luma = (cbp[lbot] >> (left_index_table->mv[2]&(~1))) & 2;
- h->mb.cache.i_cbp_left = (cbp[ltop] & 0xfff0) | (bot_luma<<2) | top_luma;
- }
- else
- h->mb.cache.i_cbp_left = cbp[ltop];
-
- /* load intra4x4 */
- /*
- * 填充intra4x4_pred_mode[]
- * 在這裏相當於在intra4x4_pred_mode[]填充了「y」,如下所示(沒有U、V)
- * |
- * --+--------------
- * | 0 0 0 0 0 0 0 0
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 0] - 1] = i4x4[ltop][left_index_table->intra[0]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 2] - 1] = i4x4[ltop][left_index_table->intra[1]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 8] - 1] = i4x4[lbot][left_index_table->intra[2]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[lbot][left_index_table->intra[3]];
-
- /* load non_zero_count */
- /*
- * 填充non_zero_count[]
- * 在這裏相當於在non_zero_count[]填充了「y」,如下所示(只列出了Y,U、V是類似的)
- * |
- * --+--------------
- * | 0 0 0 0 0 0 0 0
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- h->mb.cache.non_zero_count[x264_scan8[ 0] - 1] = nnz[ltop][left_index_table->nnz[0]];
- h->mb.cache.non_zero_count[x264_scan8[ 2] - 1] = nnz[ltop][left_index_table->nnz[1]];
- h->mb.cache.non_zero_count[x264_scan8[ 8] - 1] = nnz[lbot][left_index_table->nnz[2]];
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[lbot][left_index_table->nnz[3]];
-
- if( CHROMA_FORMAT >= CHROMA_422 )
- {
- int offset = (4>>CHROMA_H_SHIFT) - 4;
- h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+16+offset];
- h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+16+offset];
- h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+16+offset];
- h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] = nnz[lbot][left_index_table->nnz[3]+16+offset];
- h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+32+offset];
- h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+32+offset];
- h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+32+offset];
- h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = nnz[lbot][left_index_table->nnz[3]+32+offset];
- }
- else
- {
- h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz_chroma[0]];
- h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[lbot][left_index_table->nnz_chroma[1]];
- h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz_chroma[2]];
- h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[lbot][left_index_table->nnz_chroma[3]];
- }
- }
- else
- {
- //沒有相關信息的時候,填充下列數據
-
- h->mb.cache.i_cbp_left = -1;
-
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 0] - 1] =
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 2] - 1] =
- h->mb.cache.intra4x4_pred_mode[x264_scan8[ 8] - 1] =
- h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = -1;
-
- /* load non_zero_count */
- h->mb.cache.non_zero_count[x264_scan8[ 0] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[ 2] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[ 8] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = 0x80;
- if( CHROMA_FORMAT >= CHROMA_422 )
- {
- h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = 0x80;
- }
- }
-
- if( h->pps->b_transform_8x8_mode )
- {
- h->mb.cache.i_neighbour_transform_size =
- ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
- + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
- }
-
- if( b_mbaff )
- {
- h->mb.pic.i_fref[0] = h->i_ref[0] << MB_INTERLACED;
- h->mb.pic.i_fref[1] = h->i_ref[1] << MB_INTERLACED;
- }
-
- if( !b_mbaff )
- {
- //沒有「宏塊級幀場自適應」情況的時候
-
- //亮度
- //拷貝上一個宏塊最右邊一列(共16個)像素(p_fdec[0]+15)
- //作爲這一個宏塊最左邊再靠左的一列像素(p_fdec[0]-1)
- //一次拷貝8個(起始點上面4個下面4個),拷貝2次
-
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
- //加載圖像相關的指針
- //第4個參數:指明瞭第幾個分量(Y、U、V)
- //第5個參數:指明瞭是否爲色度
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 );
- if( CHROMA444 )
- {
- x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+12*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+12*FDEC_STRIDE );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 );
- }
- else
- {
- //U和V
- //YUV420P的情況下
- //拷貝上一個宏塊最右邊一列(共8個)像素
- //作爲這一個宏塊最左邊再靠左的一列像素
- //一次拷貝8個
- x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
- if( CHROMA_FORMAT == CHROMA_422 )
- {
- x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+12*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+12*FDEC_STRIDE );
- }
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 );
- }
- }
- else
- {
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 );
- if( CHROMA444 )
- {
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 );
- }
- else
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 );
- }
-
- if( h->fdec->integral )
- {
- int offset = 16 * (mb_x + mb_y * h->fdec->i_stride[0]);
- for( int list = 0; list < 2; list++ )
- for( int i = 0; i < h->mb.pic.i_fref[list]; i++ )
- h->mb.pic.p_integral[list][i] = &h->fref[list][i]->integral[offset];
- }
-
- x264_prefetch_fenc( h, h->fenc, mb_x, mb_y );
-
- /* load ref/mv/mvd */
- for( int l = 0; l < lists; l++ )
- {
- int16_t (*mv)[2] = h->mb.mv[l];
- int8_t *ref = h->mb.ref[l];
-
- int i8 = x264_scan8[0] - 1 - 1*8;
- if( h->mb.i_neighbour & MB_TOPLEFT )
- {
- //填充宏塊左上方信息
-
- int ir = b_mbaff ? 2*(s8x8*h->mb.i_mb_topleft_y + mb_x-1)+1+s8x8 : top_8x8 - 1;
- int iv = b_mbaff ? 4*(s4x4*h->mb.i_mb_topleft_y + mb_x-1)+3+3*s4x4 : top_4x4 - 1;
- if( b_mbaff && h->mb.topleft_partition )
- {
- /* Take motion vector from the middle of macroblock instead of
- * the bottom right as usual. */
- iv -= 2*s4x4;
- ir -= s8x8;
- }
- /*
- * 填充參考幀序號ref[]
- * 在這裏相當於在ref[]填充了「y」,
- * |
- * --+--------------
- * | 0 0 0 y 0 0 0 0
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- //參考幀序號
- h->mb.cache.ref[l][i8] = ref[ir];
- /*
- * 填充運動矢量mv[]
- * 在這裏相當於在mv[]填充了「y」,
- * |
- * --+--------------
- * | 0 0 0 y 0 0 0 0
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- //運動矢量
- CP32( h->mb.cache.mv[l][i8], mv[iv] );
- }
- else
- {
- h->mb.cache.ref[l][i8] = -2;
- M32( h->mb.cache.mv[l][i8] ) = 0;
- }
-
- i8 = x264_scan8[0] - 8;
- if( h->mb.i_neighbour & MB_TOP )
- {
- //填充宏塊上方信息
-
- /*
- * 填充參考幀序號ref[]
- * 在這裏相當於在ref[]分別填充了「1」和「2」,
- * |
- * --+--------------
- * | 0 0 0 0 1 1 2 2
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- h->mb.cache.ref[l][i8+0] =
- h->mb.cache.ref[l][i8+1] = ref[top_8x8 + 0];
- h->mb.cache.ref[l][i8+2] =
- h->mb.cache.ref[l][i8+3] = ref[top_8x8 + 1];
- /*
- * 填充運動矢量mv[]
- * 在這裏相當於在mv[]填充了y,
- * |
- * --+--------------
- * | 0 0 0 0 y y y y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- * | 0 0 0 0 Y Y Y Y
- */
- CP128( h->mb.cache.mv[l][i8], mv[top_4x4] );
- }
- else
- {
- M128( h->mb.cache.mv[l][i8] ) = M128_ZERO;
- M32( &h->mb.cache.ref[l][i8] ) = (uint8_t)(-2) * 0x01010101U;
- }
-
- i8 = x264_scan8[0] + 4 - 1*8;
- if( h->mb.i_neighbour & MB_TOPRIGHT )
- {
- //填充宏塊右上方信息
- int ir = b_mbaff ? 2*(s8x8*h->mb.i_mb_topright_y + (mb_x+1))+s8x8 : top_8x8 + 2;
- int iv = b_mbaff ? 4*(s4x4*h->mb.i_mb_topright_y + (mb_x+1))+3*s4x4 : top_4x4 + 4;
- h->mb.cache.ref[l][i8] = ref[ir];
- CP32( h->mb.cache.mv[l][i8], mv[iv] );
- }
- else
- h->mb.cache.ref[l][i8] = -2;
-
- i8 = x264_scan8[0] - 1;
- if( h->mb.i_neighbour & MB_LEFT )
- {
- //填充宏塊左邊信息
-
- if( b_mbaff )
- {
- h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[0]];
- h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[1]];
- h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[2]];
- h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[3]];
-
- CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[0]] );
- CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[1]] );
- CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[2]] );
- CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[3]] );
- }
- else
- {
- //不考慮「宏塊級幀場自適應」的時候
-
- const int ir = h->mb.i_b8_xy - 1;
- const int iv = h->mb.i_b4_xy - 1;
-
- /*
- * 填充參考幀序號ref[]
- * 在這裏相當於在ref[]分別填充了「1」和「2」,
- * |
- * --+--------------
- * | 0 0 0 0 0 0 0 0
- * | 0 0 0 1 Y Y Y Y
- * | 0 0 0 1 Y Y Y Y
- * | 0 0 0 2 Y Y Y Y
- * | 0 0 0 2 Y Y Y Y
- */
- h->mb.cache.ref[l][i8+0*8] =
- h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
- h->mb.cache.ref[l][i8+2*8] =
- h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
-
- /*
- * 填充運動矢量mv[]
- * 在這裏相當於在mv[]填充了y,
- * |
- * --+--------------
- * | 0 0 0 0 0 0 0 0
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- * | 0 0 0 y Y Y Y Y
- */
- CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
- CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
- CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
- CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
- }
- }
- else
- {
- for( int i = 0; i < 4; i++ )
- {
- h->mb.cache.ref[l][i8+i*8] = -2;
- M32( h->mb.cache.mv[l][i8+i*8] ) = 0;
- }
- }
-
- /* Extra logic for top right mv in mbaff.
- * . . . d . . a .
- * . . . e . . . .
- * . . . f b . c .
- * . . . . . . . .
- *
- * If the top right of the 4x4 partitions labeled a, b and c in the
- * above diagram do not exist, but the entries d, e and f exist (in
- * the macroblock to the left) then use those instead.
- */
- if( b_mbaff && (h->mb.i_neighbour & MB_LEFT) )
- {
- if( MB_INTERLACED && !h->mb.field[h->mb.i_mb_xy-1] )
- {
- h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*0];
- h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*1];
- h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[1] + 1 + s8x8*0];
- CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table->mv[0]+1)] );
- CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table->mv[1]+1)] );
- CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[1] + 3 + s4x4*(left_index_table->mv[2]+1)] );
- }
- else if( !MB_INTERLACED && h->mb.field[h->mb.i_mb_xy-1] )
- {
- // Looking at the bottom field so always take the bottom macroblock of the pair.
- h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
- h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
- h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
- CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
- CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
- CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[2]] );
- }
- }
-
- //使用了CABAC的時候纔會運行
- if( h->param.b_cabac )
- {
- uint8_t (*mvd)[8][2] = h->mb.mvd[l];
- if( h->mb.i_neighbour & MB_TOP )
- CP64( h->mb.cache.mvd[l][x264_scan8[0] - 8], mvd[top][0] );
- else
- M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
-
- if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1] >= 0) )
- {
- CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[LTOP]][left_index_table->intra[0]] );
- CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[LTOP]][left_index_table->intra[1]] );
- }
- else
- {
- M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
- M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
- }
- if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0) )
- {
- CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[LBOT]][left_index_table->intra[2]] );
- CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[LBOT]][left_index_table->intra[3]] );
- }
- else
- {
- M16( h->mb.cache.mvd[l][x264_scan8[0]-1+2*8] ) = 0;
- M16( h->mb.cache.mvd[l][x264_scan8[0]-1+3*8] ) = 0;
- }
- }
-
- /* If motion vectors are cached from frame macroblocks but this
- * macroblock is a field macroblock then the motion vector must be
- * halved. Similarly, motion vectors from field macroblocks are doubled. */
- if( b_mbaff )
- {
- #define MAP_MVS\
- if( FIELD_DIFFERENT(h->mb.i_mb_topleft_xy) )\
- MAP_F2F(mv, ref, x264_scan8[0] - 1 - 1*8)\
- if( FIELD_DIFFERENT(top) )\
- {\
- MAP_F2F(mv, ref, x264_scan8[0] + 0 - 1*8)\
- MAP_F2F(mv, ref, x264_scan8[0] + 1 - 1*8)\
- MAP_F2F(mv, ref, x264_scan8[0] + 2 - 1*8)\
- MAP_F2F(mv, ref, x264_scan8[0] + 3 - 1*8)\
- }\
- if( FIELD_DIFFERENT(h->mb.i_mb_topright_xy) )\
- MAP_F2F(mv, ref, x264_scan8[0] + 4 - 1*8)\
- if( FIELD_DIFFERENT(left[0]) )\
- {\
- MAP_F2F(mv, ref, x264_scan8[0] - 1 + 0*8)\
- MAP_F2F(mv, ref, x264_scan8[0] - 1 + 1*8)\
- MAP_F2F(mv, ref, x264_scan8[0] - 1 + 2*8)\
- MAP_F2F(mv, ref, x264_scan8[0] - 1 + 3*8)\
- MAP_F2F(topright_mv, topright_ref, 0)\
- MAP_F2F(topright_mv, topright_ref, 1)\
- MAP_F2F(topright_mv, topright_ref, 2)\
- }
-
- if( MB_INTERLACED )
- {
- #define FIELD_DIFFERENT(macroblock) (macroblock >= 0 && !h->mb.field[macroblock])
- #define MAP_F2F(varmv, varref, index)\
- if( h->mb.cache.varref[l][index] >= 0 )\
- {\
- h->mb.cache.varref[l][index] <<= 1;\
- h->mb.cache.varmv[l][index][1] /= 2;\
- h->mb.cache.mvd[l][index][1] >>= 1;\
- }
- MAP_MVS
- #undef MAP_F2F
- #undef FIELD_DIFFERENT
- }
- else
- {
- #define FIELD_DIFFERENT(macroblock) (macroblock >= 0 && h->mb.field[macroblock])
- #define MAP_F2F(varmv, varref, index)\
- if( h->mb.cache.varref[l][index] >= 0 )\
- {\
- h->mb.cache.varref[l][index] >>= 1;\
- h->mb.cache.varmv[l][index][1] <<= 1;\
- h->mb.cache.mvd[l][index][1] <<= 1;\
- }
- MAP_MVS
- #undef MAP_F2F
- #undef FIELD_DIFFERENT
- }
- }
- }
-
- if( b_mbaff && mb_x == 0 && !(mb_y&1) )
- {
- if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
- h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
- else
- h->mb.field_decoding_flag = 0;
- }
-
- /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
- * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
- h->mb.b_allow_skip = 1;
- if( b_mbaff )
- {
- if( MB_INTERLACED != h->mb.field_decoding_flag &&
- (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
- h->mb.b_allow_skip = 0;
- }
-
- //使用了CABAC的時候纔會運行
- if( h->param.b_cabac )
- {
- if( b_mbaff )
- {
- int left_xy, top_xy;
- /* Neighbours here are calculated based on field_decoding_flag */
- int mb_xy = mb_x + (mb_y&~1)*h->mb.i_mb_stride;
- left_xy = mb_xy - 1;
- if( (mb_y&1) && mb_x > 0 && h->mb.field_decoding_flag == h->mb.field[left_xy] )
- left_xy += h->mb.i_mb_stride;
- if( h->mb.field_decoding_flag )
- {
- top_xy = mb_xy - h->mb.i_mb_stride;
- if( !(mb_y&1) && top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && h->mb.field[top_xy] )
- top_xy -= h->mb.i_mb_stride;
- }
- else
- top_xy = mb_x + (mb_y-1)*h->mb.i_mb_stride;
-
- h->mb.cache.i_neighbour_skip = (mb_x > 0 && h->mb.slice_table[left_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[left_xy] ))
- + (top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[top_xy] ));
- }
- else
- {
- h->mb.cache.i_neighbour_skip = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
- + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ));
- }
- }
-
- /* load skip */
- //處理「skip」類型宏塊
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- h->mb.bipred_weight = h->mb.bipred_weight_buf[MB_INTERLACED][MB_INTERLACED&(mb_y&1)];
- h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[MB_INTERLACED][MB_INTERLACED&(mb_y&1)];
- if( h->param.b_cabac )
- {
- uint8_t skipbp;
- x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- if( b_mbaff )
- {
- skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[LTOP]] : 0;
- h->mb.cache.skip[x264_scan8[0] - 1] = (skipbp >> (1+(left_index_table->mv[0]&~1))) & 1;
- skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[LBOT]] : 0;
- h->mb.cache.skip[x264_scan8[8] - 1] = (skipbp >> (1+(left_index_table->mv[2]&~1))) & 1;
- }
- else
- {
- skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
- h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
- h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
- }
- skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
- h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4;
- h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8;
- }
- }
-
- if( h->sh.i_type == SLICE_TYPE_P )
- x264_mb_predict_mv_pskip( h, h->mb.cache.pskip_mv );
-
- /*
- * i_neightbour8把一個宏塊分成4個8x8的子塊,編號如下,用於記錄它們鄰塊的可用性
- * +--------+--------+
- * | | |
- * | 0 | 1 |
- * | | |
- * +--------+--------+
- * | | |
- * | 2 | 3 |
- * | | |
- * +--------+--------+
- *
- * i_neightbour4把一個宏塊分成16個4x4的子塊,編號如下,用於記錄它們鄰塊的可用性
- * (實際上也是類似scan8[]讀取cache的順序)
- * +----+----+----+----+
- * | 0 | 1 | 4 | 5 |
- * +----+----+----+----+
- * | 2 | 3 | 6 | 7 |
- * +----+----+----+----+
- * | 8 | 9 | 12 | 13 |
- * +----+----+----+----+
- * | 10 | 11 | 14 | 15 |
- * +----+----+----+----+
- *
- */
- h->mb.i_neighbour4[0] =
- h->mb.i_neighbour8[0] = (h->mb.i_neighbour_intra & (MB_TOP|MB_LEFT|MB_TOPLEFT))
- | ((h->mb.i_neighbour_intra & MB_TOP) ? MB_TOPRIGHT : 0);
- h->mb.i_neighbour4[4] =
- h->mb.i_neighbour4[1] = MB_LEFT | ((h->mb.i_neighbour_intra & MB_TOP) ? (MB_TOP|MB_TOPLEFT|MB_TOPRIGHT) : 0);
- h->mb.i_neighbour4[2] =
- h->mb.i_neighbour4[8] =
- h->mb.i_neighbour4[10] =
- h->mb.i_neighbour8[2] = MB_TOP|MB_TOPRIGHT | ((h->mb.i_neighbour_intra & MB_LEFT) ? (MB_LEFT|MB_TOPLEFT) : 0);
- h->mb.i_neighbour4[5] =
- h->mb.i_neighbour8[1] = MB_LEFT | (h->mb.i_neighbour_intra & MB_TOPRIGHT)
- | ((h->mb.i_neighbour_intra & MB_TOP) ? MB_TOP|MB_TOPLEFT : 0);
- }
x264_macroblock_cache_load()源代碼比較長,比較關鍵的地方都做了註釋,在這裏就不詳細記錄了。總體說來該函數的流程如下所示:
(1)加載Intra4x4幀內預測模式intra4x4_pred_mode[]和DCT非零係數non_zero_count[]緩存Cache的宏塊周邊信息。加載順序爲:上->左->左上。
(2)加載宏塊重建像素p_fdec[]的周邊像素,以及宏塊編碼像素p_fenc[]。對於p_fdec[]來說,在本函數中直接加載當前宏塊左邊的像素;調用函數x264_macroblock_load_pic_pointers()加載當前宏塊上面的像素。對於p_fenc[]來說,調用x264_macroblock_load_pic_pointers()從圖像上拷貝數據。
(3)加載參考幀序號ref[]和運動矢量mv[]緩存Cache的宏塊周邊信息。加載順序爲:左上->上->左。
(4)加載其它信息。
下面簡單瀏覽一下x264_macroblock_load_pic_pointers()的源代碼。
x264_macroblock_load_pic_pointers()
x264_macroblock_load_pic_pointers()用於給宏塊重建像素p_fdec[]和宏塊編碼像素p_fenc[]加載數據,並且加載圖像的半像素數據。它的定義位於common\macroblock.c,如下所示。
- //加載圖像相關的指針
- static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
- {
- int mb_interlaced = b_mbaff && MB_INTERLACED;
- int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16;
- int i_stride = h->fdec->i_stride[i];
- int i_stride2 = i_stride << mb_interlaced;
- int i_pix_offset = mb_interlaced
- ? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + height * mb_y * i_stride;
- //從一整個重建幀中讀取一部分像素,賦值到重建幀宏塊中
- //i_pix_offset爲宏塊相對於整個幀起始位置的偏移量
- pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1);
- //前一行宏塊的底部邊界像素
- pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
- int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
- /* ref_pix_offset[0] references the current field and [1] the opposite field. */
- if( mb_interlaced )
- ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
- h->mb.pic.i_stride[i] = i_stride2;
- h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
- if( b_chroma )
- {
- //色度
- //編碼幀p_fenc
- h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
- //重建幀p_fdec
- memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
- memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
- }