E203數據衝突處理OITF


      流水線的數據衝突分爲三類:WAR,RAW,WAWhtml

       https://wenku.baidu.com/view/e066926d48d7c1c708a14508.html數組

  • WAR: write after read 相關性,又稱先讀後寫相關性。好比下面的指令序列,第一條指令會讀取x4,第二條指令會寫x4。在流水線中,若是第二條指令比第一條指令先寫x4,則第一條指令就會讀出錯誤的值。

           add x5, x4,x6架構

           add x4, x3, x2oop

  • WAW: write after write 相關性,又稱先寫後寫相關性。好比下面的指令序列,兩條指令都會寫x5。在流水線中,若是第二條指令比第一條指令先寫x5,就會引發邏輯錯誤。

           add x5, x4,x6fetch

           add x5, x3, x2spa

  • RAW:read after write相關性,又稱先寫後讀相關性。好比下面指令序列,若是第二條指令,在第一條指令寫x5以前,第二條指令先讀x4,就會引發邏輯錯誤。

           add x5, x4,x6code

           add x4, x5, x2htm


      因爲蜂鳥E200系列是按序派遣,按順序寫回的微架構,在指令派遣時候就已經從通用寄存器數組中讀取了源操做數。後續執行的指令寫回regfile的操做不可能影響到前面指令的讀取,因此不可能發生WAR相關性形成的數據衝突。blog

      正在派遣的指令處在流水線的第二級,假設以前派遣的指令是單週期指令,則前序指令確定已經完成了執行且將結果寫回了Regfile。所以正在派遣的指令不可能會發生RAW數據衝突。可是假設以前派遣的指令是多週期指令(長指令),因爲指令須要多個週期才能寫回結果。所以正在派遣的指令可能會產生前序相關的RAW相關性。索引

     正在派遣的指令處在流水線的第二級,假設以前派遣的指令是單週期指令,則前序指令確定已經完成了執行且將結果寫回了Regfile。所以正在派遣的指令不可能會發生WAW數據衝突。可是假設以前派遣的指令是多週期指令(長指令),因爲指令須要多個週期才能寫回結果。所以正在派遣的指令可能會產生前序相關的WAW相關性。

    爲了能檢測出長指令的RAW和WAW相關性,蜂鳥E200使用了一個outstanding instruction track fifo(OITF)模塊。在流水線的派遣(Dispatch)點,每一次派遣一個長指令,則會在OITF中分配一個表項(Entry),在這個表項中會存儲該長指令的結果寄存器索引。在流水線的寫回(Write-back)點,每次按順序寫回一個長指令以後,就會將此指令在OITF中的表項移除。

      每條指令派遣時,都會將本指令的源操做數和目的操做數寄存器索引和OITF中的各個表項進行比對,從而判斷本指令是否與已經被派遣出,且還沒有寫回的長指令產生RAW和WAW相關性。若是產生相關性,則stall住當前指令的派遣。若是沒有RAW和WAW相關性,且該指令爲多週期長指令,把該指令寫入OITF,若是OITF是full,則仍要stall住管線,等待OITF釋放空間後,再寫入並派遣。

   在writeback模塊,會進行長指令寫回仲裁,長指令寫回regfile後,會釋放OITF中相應的表項。

image

     



OITF代碼以下,若是fifo full,則dis_ready=0, 與dispatch模塊握手失敗,不會發送新的dispatch進來。若是不爲空,會發送新的指令進來進行判斷。

`include "e203_defines.v"

module e203_exu_oitf (
  output dis_ready,

  input  dis_ena, //dispatch a long instruction enable signal
  input  ret_ena, //write back a long instruction enable signal

  output [`E203_ITAG_WIDTH-1:0] dis_ptr,  //write pointer
  output [`E203_ITAG_WIDTH-1:0] ret_ptr,  //read pointer

  output [`E203_RFIDX_WIDTH-1:0] ret_rdidx,
  output ret_rdwen,
  output ret_rdfpu,
  output [`E203_PC_SIZE-1:0] ret_pc,

  input  disp_i_rs1en, // enable if current dispatch instruction fetch first source operand 
  input  disp_i_rs2en, // ...
  input  disp_i_rs3en, // ...
  input  disp_i_rdwen, // enable if current dispatch instruction write back to register
  input  disp_i_rs1fpu, // enable if current dispath instruction need to read float gpr
  input  disp_i_rs2fpu, //...
  input  disp_i_rs3fpu, //...
  input  disp_i_rdfpu,  //enable if current dipatch instruction need to write back to float register files.
  //register index
  input  [`E203_RFIDX_WIDTH-1:0] disp_i_rs1idx,
  input  [`E203_RFIDX_WIDTH-1:0] disp_i_rs2idx,
  input  [`E203_RFIDX_WIDTH-1:0] disp_i_rs3idx,
  input  [`E203_RFIDX_WIDTH-1:0] disp_i_rdidx,
  input  [`E203_PC_SIZE    -1:0] disp_i_pc, //pc of current dispatch instruciotn 

  output oitfrd_match_disprs1, //dispatch instruction rs1 is same as any item of result register in oitf
  output oitfrd_match_disprs2, //...
  output oitfrd_match_disprs3, //...
  output oitfrd_match_disprd,  //dispatch instruction rd is same as any item of result register in oitf.
  //if empty, no conflict
  output oitf_empty,
  input  clk,
  input  rst_n
);

  wire [`E203_OITF_DEPTH-1:0] vld_set;
  wire [`E203_OITF_DEPTH-1:0] vld_clr;
  wire [`E203_OITF_DEPTH-1:0] vld_ena;
  wire [`E203_OITF_DEPTH-1:0] vld_nxt;
  wire [`E203_OITF_DEPTH-1:0] vld_r; //if it is valid signal in all item
  wire [`E203_OITF_DEPTH-1:0] rdwen_r;// if it is write back register in all item
  wire [`E203_OITF_DEPTH-1:0] rdfpu_r; //result register in all item if are float
  wire [`E203_RFIDX_WIDTH-1:0] rdidx_r[`E203_OITF_DEPTH-1:0]; //register index in all items
  // The PC here is to be used at wback stage to track out the
  // PC of exception of long-pipe instruction
  wire [`E203_PC_SIZE-1:0] pc_r[`E203_OITF_DEPTH-1:0];

  wire alc_ptr_ena = dis_ena;  //dispatch a long instruction enable signal, as write pointer enable signal
  wire ret_ptr_ena = ret_ena;  //write back a long instruction enable signal, as read pointer enable signal

  wire oitf_full ;

  wire [`E203_ITAG_WIDTH-1:0] alc_ptr_r; //write pointer, long instruction dispatch
  wire [`E203_ITAG_WIDTH-1:0] ret_ptr_r; //read pointer, long instruction write back

  generate
  if(`E203_OITF_DEPTH > 1) begin: depth_gt1//{
      //extra mark bit for write full
      wire alc_ptr_flg_r;
      wire alc_ptr_flg_nxt = ~alc_ptr_flg_r;
      wire alc_ptr_flg_ena = (alc_ptr_r == ($unsigned(`E203_OITF_DEPTH-1))) & alc_ptr_ena;

      sirv_gnrl_dfflr #(1) alc_ptr_flg_dfflrs(alc_ptr_flg_ena, alc_ptr_flg_nxt, alc_ptr_flg_r, clk, rst_n);

      wire [`E203_ITAG_WIDTH-1:0] alc_ptr_nxt;
      //if write to fifo depth, write ptr = 0,otherwise write ptr = write ptr + 1 
      assign alc_ptr_nxt = alc_ptr_flg_ena ? `E203_ITAG_WIDTH'b0 : (alc_ptr_r + 1'b1);

      sirv_gnrl_dfflr #(`E203_ITAG_WIDTH) alc_ptr_dfflrs(alc_ptr_ena, alc_ptr_nxt, alc_ptr_r, clk, rst_n);

      //extra mark bit for read empty 
      wire ret_ptr_flg_r;
      wire ret_ptr_flg_nxt = ~ret_ptr_flg_r;
      wire ret_ptr_flg_ena = (ret_ptr_r == ($unsigned(`E203_OITF_DEPTH-1))) & ret_ptr_ena;

      sirv_gnrl_dfflr #(1) ret_ptr_flg_dfflrs(ret_ptr_flg_ena, ret_ptr_flg_nxt, ret_ptr_flg_r, clk, rst_n);

      wire [`E203_ITAG_WIDTH-1:0] ret_ptr_nxt;
      //if read to fifo depth, read ptr = 0, otherwise read prt = read prt + 1
      assign ret_ptr_nxt = ret_ptr_flg_ena ? `E203_ITAG_WIDTH'b0 : (ret_ptr_r + 1'b1);

      sirv_gnrl_dfflr #(`E203_ITAG_WIDTH) ret_ptr_dfflrs(ret_ptr_ena, ret_ptr_nxt, ret_ptr_r, clk, rst_n);
      //empty, full mark
      assign oitf_empty = (ret_ptr_r == alc_ptr_r) &   (ret_ptr_flg_r == alc_ptr_flg_r);
      assign oitf_full  = (ret_ptr_r == alc_ptr_r) & (~(ret_ptr_flg_r == alc_ptr_flg_r));
  end//}
  else begin: depth_eq1//}{
      assign alc_ptr_r =1'b0;
      assign ret_ptr_r =1'b0;
      assign oitf_empty = ~vld_r[0];
      assign oitf_full  = vld_r[0];
  end//}
  endgenerate//}

  assign ret_ptr = ret_ptr_r;
  assign dis_ptr = alc_ptr_r;

 //// 
 //// // If the OITF is not full, or it is under retiring, then it is ready to accept new dispatch
 //// assign dis_ready = (~oitf_full) | ret_ena;
 // To cut down the loop between ALU write-back valid --> oitf_ret_ena --> oitf_ready ---> dispatch_ready --- > alu_i_valid
 // we exclude the ret_ena from the ready signal
 assign dis_ready = (~oitf_full);

  wire [`E203_OITF_DEPTH-1:0] rd_match_rs1idx;
  wire [`E203_OITF_DEPTH-1:0] rd_match_rs2idx;
  wire [`E203_OITF_DEPTH-1:0] rd_match_rs3idx;
  wire [`E203_OITF_DEPTH-1:0] rd_match_rdidx;

  genvar i;
  generate //{
      for (i=0; i<`E203_OITF_DEPTH; i=i+1) begin:oitf_entries//{
        //every time, assign a item and write pointer same as current i, then
	//valid set is high
        assign vld_set[i] = alc_ptr_ena & (alc_ptr_r == i);
        //every time, assign a item and read pointer same as current i, then
	//valid clr is high
        assign vld_clr[i] = ret_ptr_ena & (ret_ptr_r == i);
        assign vld_ena[i] = vld_set[i] |   vld_clr[i];
        assign vld_nxt[i] = vld_set[i] | (~vld_clr[i]);

        sirv_gnrl_dfflr #(1) vld_dfflrs(vld_ena[i], vld_nxt[i], vld_r[i], clk, rst_n);
        //Payload only set, no need to clear
        sirv_gnrl_dffl #(`E203_RFIDX_WIDTH) rdidx_dfflrs(vld_set[i], disp_i_rdidx, rdidx_r[i], clk);
        sirv_gnrl_dffl #(`E203_PC_SIZE    ) pc_dfflrs   (vld_set[i], disp_i_pc   , pc_r[i]   , clk);
        sirv_gnrl_dffl #(1)                 rdwen_dfflrs(vld_set[i], disp_i_rdwen, rdwen_r[i], clk);
        sirv_gnrl_dffl #(1)                 rdfpu_dfflrs(vld_set[i], disp_i_rdfpu, rdfpu_r[i], clk);
        //compare dispatch source operand with result register in fifo
        assign rd_match_rs1idx[i] = vld_r[i] & rdwen_r[i] & disp_i_rs1en & (rdfpu_r[i] == disp_i_rs1fpu) & (rdidx_r[i] == disp_i_rs1idx);
        assign rd_match_rs2idx[i] = vld_r[i] & rdwen_r[i] & disp_i_rs2en & (rdfpu_r[i] == disp_i_rs2fpu) & (rdidx_r[i] == disp_i_rs2idx);
        assign rd_match_rs3idx[i] = vld_r[i] & rdwen_r[i] & disp_i_rs3en & (rdfpu_r[i] == disp_i_rs3fpu) & (rdidx_r[i] == disp_i_rs3idx);
        assign rd_match_rdidx [i] = vld_r[i] & rdwen_r[i] & disp_i_rdwen & (rdfpu_r[i] == disp_i_rdfpu ) & (rdidx_r[i] == disp_i_rdidx );

      end//}
  endgenerate//}
  //rs1 in fifo, so RAW relative
  assign oitfrd_match_disprs1 = |rd_match_rs1idx;
  //rs2 in fifo, so RAW relative
  assign oitfrd_match_disprs2 = |rd_match_rs2idx;
  //rs3 in fifo, so RAW relative
  assign oitfrd_match_disprs3 = |rd_match_rs3idx;
  //rd in fifo, so WAW relative
  assign oitfrd_match_disprd  = |rd_match_rdidx ;

  assign ret_rdidx = rdidx_r[ret_ptr];
  assign ret_pc    = pc_r [ret_ptr];
  assign ret_rdwen = rdwen_r[ret_ptr];
  assign ret_rdfpu = rdfpu_r[ret_ptr];

endmodule


相關文章
相關標籤/搜索