当前位置: 首页 > news >正文

LoongArch 五级流水线实现

在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。

mycpu_top.v
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
2. IF_stage.v
`include "mycpu.h"module if_stage(input                          clk            ,input                          reset          ,//allwoininput                          ds_allowin     ,//brbusinput  [`BR_BUS_WD       -1:0] br_bus         ,//to dsoutput                         fs_to_ds_valid ,output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus   ,// inst sram interfaceoutput        inst_sram_en   ,output [ 3:0] inst_sram_we  ,output [31:0] inst_sram_addr ,output [31:0] inst_sram_wdata,input  [31:0] inst_sram_rdata
);reg         fs_valid;//表示当前阶段是否有效
wire        fs_ready_go;
wire        fs_allowin; //并确定 IF 阶段是否接受取指令的请求
wire        to_fs_valid;//表示是否可以将指令传递到下一个阶段// wire         pre_fs_ready_go;  //增加
// wire         br_stall;        //增加
// assign to_fs_valid      = ~reset && pre_fs_ready_go;//修改
// assign pre_fs_ready_go  = ~br_stall;  //增加wire [31:0] seq_pc;
wire [31:0] nextpc;wire        br_taken;
wire [31:0] br_target;
assign {br_taken,br_target} = br_bus;
// assign {br_stall,br_taken,br_target} = br_bus; //修改     分支预测
// assign inst_sram_en = to_fs_valid && fs_allowin && ~br_stall; //修改
wire [31:0] fs_inst;
reg  [31:0] fs_pc;
assign fs_to_ds_bus = {fs_inst ,fs_pc   };// pre-IF stage
assign to_fs_valid  = ~reset;
assign seq_pc       = fs_pc + 3'h4;
assign nextpc       = br_taken ? br_target : seq_pc; // IF stage
assign fs_ready_go    = 1'b1;
assign fs_allowin     = !fs_valid || (fs_ready_go && ds_allowin);
assign fs_to_ds_valid =  fs_valid && fs_ready_go;always @(posedge clk) beginif (reset) beginfs_valid <= 1'b0;endelse if (fs_allowin) beginfs_valid <= to_fs_valid;endif (reset) beginfs_pc <= 32'h1bfffffc;  //trick: to make nextpc be 0x1c000000 during reset endelse if (to_fs_valid && fs_allowin) beginfs_pc <= nextpc;end
endassign inst_sram_en    = to_fs_valid && fs_allowin;
assign inst_sram_we   = 4'h0;
assign inst_sram_addr  = nextpc;
assign inst_sram_wdata = 32'b0;assign fs_inst         = inst_sram_rdata;endmodule
3. ID_stage
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
4. EXE_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
5. MEM_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
6. WB_stage
`include "mycpu.h"module wb_stage(input                           clk           ,input                           reset         ,//allowinoutput                          ws_allowin    ,//from msinput                           ms_to_ws_valid,input  [`MS_TO_WS_BUS_WD -1:0]  ms_to_ws_bus  ,//to rf: for write backoutput [`WS_TO_RF_BUS_WD -1:0]  ws_to_rf_bus  ,//trace debug interfaceoutput [31:0] debug_wb_pc     ,output [ 3:0] debug_wb_rf_we ,output [ 4:0] debug_wb_rf_wnum,output [31:0] debug_wb_rf_wdata // lab4// output [ 4:0] WB_dest         ,//lab5// output [31:0] WB_result
);
reg         ws_valid;
wire        ws_ready_go;reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r;
wire        ws_gr_we;
wire [ 4:0] ws_dest;
wire [31:0] ws_final_result;
wire [31:0] ws_pc;
assign {ws_gr_we       ,  //69:69ws_dest        ,  //68:64ws_final_result,  //63:32ws_pc             //31:0} = ms_to_ws_bus_r;wire        rf_we;
wire [4 :0] rf_waddr;
wire [31:0] rf_wdata;
assign ws_to_rf_bus = {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0};assign ws_ready_go = 1'b1;
assign ws_allowin  = !ws_valid || ws_ready_go;
always @(posedge clk) beginif (reset) beginws_valid <= 1'b0;endelse if (ws_allowin) beginws_valid <= ms_to_ws_valid;endif (ms_to_ws_valid && ws_allowin) beginms_to_ws_bus_r <= ms_to_ws_bus;end
endassign rf_we    = ws_gr_we&&ws_valid;
assign rf_waddr = ws_dest;
assign rf_wdata = ws_final_result;// debug info generate
assign debug_wb_pc       = ws_pc;
assign debug_wb_rf_we   = {4{rf_we}};
assign debug_wb_rf_wnum  = ws_dest;
assign debug_wb_rf_wdata = ws_final_result;// assign WB_dest = ws_dest & {5{ws_valid}};
// assign WB_result = ws_final_result;
endmodule

相关文章:

LoongArch 五级流水线实现

在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。 mycpu_top.v include "mycpu.h"module id_stage(input clk ,input reset ,//allowininput …...

「Qt中文教程指南」如何创建基于Qt Widget的应用程序(四)

Qt 是目前最先进、最完整的跨平台C开发工具。它不仅完全实现了一次编写&#xff0c;所有平台无差别运行&#xff0c;更提供了几乎所有开发过程中需要用到的工具。如今&#xff0c;Qt已被运用于超过70个行业、数千家企业&#xff0c;支持数百万设备及应用。 本文描述了如何使用…...

11、SpringCloud -- 利用redis优化查询秒杀商品的数据(就是可以把商品数据先存到redis中)

目录 秒杀商品数据存到redis中并查询需求hash理解代码&#xff1a;RedisService商品数据初始化&#xff1a;查询 测试&#xff1a; 秒杀商品数据存到redis中并查询 需求 利用redis优化查询秒杀商品的数据&#xff0c;就是可以把商品数据先存到redis中&#xff0c;要查的时候先…...

计算节点上iptables安全组分析

计算节点上iptables安全组分析 之前介绍过neutron 安全组基于iptables 和 ct 实现&#xff0c;分析一下计算节点上面的neutron 安全组的iptables&#xff0c;加深一下理解iptables以及安全组的实现。&#xff08;PS: 如下基于openstack stein) 查看某计算节点上面的iptables …...

香港科技大学广州|可持续能源与环境学域博士招生宣讲会—上海专场!!!(暨全额奖学金政策)

香港科技大学广州&#xff5c;可持续能源与环境学域博士招生宣讲会—上海专场&#xff01;&#xff01;&#xff01;&#xff08;暨全额奖学金政策&#xff09; “面向未来改变游戏规则的——可持续能源与环境学域” &#xfffd;&#xfffd;&#xfffd;专注于能源环境跨学…...

有没有什么网站可以在线做视频脚本?批量制作视频,批量替换素材混剪?

随着视频内容的普及和需求的不断增长&#xff0c;越来越多的个人和团队开始涉足视频制作领域。为了提高效率并满足大量制作需求&#xff0c;许多在线视频制作工具应运而生&#xff0c;提供了一系列便捷的功能&#xff0c;如在线视频脚本编辑、批量制作视频、批量替换素材和混剪…...

【python数学建模】特征值与特征向量运用

1、求数列通项 &#xff08;1&#xff09;转化为求矩阵的幂次问题 例&#xff1a;求斐波那契数列的通项公式 已知斐波那契数列满足&#xff1a; F k 2 F k 1 F k F_{k2}F_{k1}F_{k} Fk2​Fk1​Fk​ &#xff08;a&#xff09; 降阶&#xff1a;将二阶差分方程转化为一阶…...

什么是 CNN? 卷积神经网络? 怎么用 CNN 进行分类?(1)

先看卷积是啥&#xff0c;url: https://www.bilibili.com/video/BV1JX4y1K7Dr/?spm_id_from333.337.search-card.all.click&vd_source7a1a0bc74158c6993c7355c5490fc600 下面这个式子就是卷积 看完了&#xff0c;感觉似懂非懂 下一个参考视频&#xff1a;https://www.y…...

java解决修改图片尺寸,压缩图片后出现背景变黑,图片字体模糊问题

将以下数学公式的图片使用Hutool提供的图片工具类改变尺寸 代码如下: package com.jason.common.file.word;import cn.hutool.core.img.ImgUtil; import cn.hutool.core.io.FileUtil;import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage;…...

jq/js检测鼠标指针移动离开页面

通过 mouseout 鼠标事件&#xff0c;判断鼠标去往哪个元素 知识点&#xff1a;relatedTarget 事件属性 定义和用法 relatedTarget 事件属性返回与事件的目标节点相关的节点。 对于 mouseover 事件来说&#xff0c;该属性是鼠标指针移到目标节点上时所离开的那个节点。 对于 …...

ICC2: 如何在显示GUI操作产生的命令

我正在「拾陆楼」和朋友们讨论有趣的话题&#xff0c;你⼀起来吧&#xff1f; 拾陆楼知识星球入口 ICC2&#xff1a;自定义快捷键和菜单 VIEW -> Perference -> Global Settings 把display commands in logging console 下面几个都勾上即可。...

内网渗透——macOS上搭建Web服务器

# 公网访问macOS本地web服务器【内网穿透】 文章目录 1. 启动Apache服务器2. 公网访问本地web服务2.1 本地安装配置cpolar2.2 创建隧道2.3 测试访问公网地址3. 配置固定二级子域名3.1 保留一个二级子域名3.2 配置二级子域名4. 测试访问公网固定二级子域名 以macOS自带的Apache…...

Centos下用nodejs实现一个简单的web服务器

WebRTC是音视频直播中最常用的一个框架&#xff0c;在使用的过程中&#xff0c;我们就需要实现一个服务器端。本文以nodejs实现一个服务器为例&#xff0c;讲述一下在centos下如何用nodejs实现一个简单的web服务器。 一、安装nodejs 在linux环境下安装nodejs有多重方式&#x…...

3.10每日一题(三角有理函数积分(三角函数加减乘除))

1、通过类型判别方法>判断出为凑 tanx 2、加项减项拆常用的积分公式 注&#xff1a;tanx的导数是&#xff1a;cosx的平方分之一 cosx的平方分之一 1 tanx arctanx的求导公式要记住...

python练习(猜数字,99乘法表)

python练习(猜数字&#xff0c;99乘法表) 猜数字 import random num1random.choice(range(1,101))for i in range(11):num2input("plz input a number:")num2int(num2)if num1<num2:print("太大了&#xff0c;小一点")elif num1>num2:print("…...

正确部署Baichuan2(Ubuntu20.4) 步骤及可能出现的问题

部署其实是不太复杂的,但实际上也耗费了接近2-3天的时间去不断的设置 1 硬件配置信息 采用esxi 虚拟化的方式将T4 卡穿透给esxi 种的ubuntu20.4虚拟机 CPU给到8 core 内存至少32GB以上 T4卡是16GB 2 预先准备OS环境 这里使用的是ubuntu20.4版本,esxi中需要设置uefI启动方…...

docker 部署prometheus和grafana

1.启动node 容器 docker run -d -p 9100:9100 -v "/proc:/host/proc:ro" -v "/sys:/host/sys:ro" -v "/:/rootfs:ro" --net"bridge" prom/node-exporter 2.访问http://192.168.1.122:9100/metrics 3.创建文件/home/prometheus/ 下…...

在本地模拟C/S,Socket套接字的使用

public class SocketTCP01Server {public static void main(String[] args) throws IOException {/**1.在本机的 9999 端口监听 &#xff0c;等待连接细节&#xff1a; 要求在本机没有其他服务在监听999细节&#xff1a;这个ServerSocket 可以通过accept()返回多个Socket[多个客…...

香港科技大学广州|可持续能源与环境学域博士招生宣讲会—东南大学专场!!!(暨全额奖学金政策)

香港科技大学广州&#xff5c;可持续能源与环境学域博士招生宣讲会—东南大学专场&#xff01;&#xff01;&#xff01;&#xff08;暨全额奖学金政策&#xff09; “面向未来改变游戏规则的——可持续能源与环境学域” 专注于能源环境跨学科尖端技术研究 培养可持续能源技术…...

[Leetcode] 0108. 将有序数组转换为二叉搜索树

108. 将有序数组转换为二叉搜索树 题目描述 给你一个整数数组 nums &#xff0c;其中元素已经按 升序 排列&#xff0c;请你将其转换为一棵 高度平衡 二叉搜索树。 高度平衡 二叉树是一棵满足「每个节点的左右两个子树的高度差的绝对值不超过 1 」的二叉树。 示例 1&#xff1a…...

Admin.Net中的消息通信SignalR解释

定义集线器接口 IOnlineUserHub public interface IOnlineUserHub {/// 在线用户列表Task OnlineUserList(OnlineUserList context);/// 强制下线Task ForceOffline(object context);/// 发布站内消息Task PublicNotice(SysNotice context);/// 接收消息Task ReceiveMessage(…...

基于服务器使用 apt 安装、配置 Nginx

&#x1f9fe; 一、查看可安装的 Nginx 版本 首先&#xff0c;你可以运行以下命令查看可用版本&#xff1a; apt-cache madison nginx-core输出示例&#xff1a; nginx-core | 1.18.0-6ubuntu14.6 | http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages ng…...

STM32F4基本定时器使用和原理详解

STM32F4基本定时器使用和原理详解 前言如何确定定时器挂载在哪条时钟线上配置及使用方法参数配置PrescalerCounter ModeCounter Periodauto-reload preloadTrigger Event Selection 中断配置生成的代码及使用方法初始化代码基本定时器触发DCA或者ADC的代码讲解中断代码定时启动…...

令牌桶 滑动窗口->限流 分布式信号量->限并发的原理 lua脚本分析介绍

文章目录 前言限流限制并发的实际理解限流令牌桶代码实现结果分析令牌桶lua的模拟实现原理总结&#xff1a; 滑动窗口代码实现结果分析lua脚本原理解析 限并发分布式信号量代码实现结果分析lua脚本实现原理 双注解去实现限流 并发结果分析&#xff1a; 实际业务去理解体会统一注…...

《基于Apache Flink的流处理》笔记

思维导图 1-3 章 4-7章 8-11 章 参考资料 源码&#xff1a; https://github.com/streaming-with-flink 博客 https://flink.apache.org/bloghttps://www.ververica.com/blog 聚会及会议 https://flink-forward.orghttps://www.meetup.com/topics/apache-flink https://n…...

大数据学习(132)-HIve数据分析

​​​​&#x1f34b;&#x1f34b;大数据学习&#x1f34b;&#x1f34b; &#x1f525;系列专栏&#xff1a; &#x1f451;哲学语录: 用力所能及&#xff0c;改变世界。 &#x1f496;如果觉得博主的文章还不错的话&#xff0c;请点赞&#x1f44d;收藏⭐️留言&#x1f4…...

网站指纹识别

网站指纹识别 网站的最基本组成&#xff1a;服务器&#xff08;操作系统&#xff09;、中间件&#xff08;web容器&#xff09;、脚本语言、数据厍 为什么要了解这些&#xff1f;举个例子&#xff1a;发现了一个文件读取漏洞&#xff0c;我们需要读/etc/passwd&#xff0c;如…...

使用Spring AI和MCP协议构建图片搜索服务

目录 使用Spring AI和MCP协议构建图片搜索服务 引言 技术栈概览 项目架构设计 架构图 服务端开发 1. 创建Spring Boot项目 2. 实现图片搜索工具 3. 配置传输模式 Stdio模式&#xff08;本地调用&#xff09; SSE模式&#xff08;远程调用&#xff09; 4. 注册工具提…...

Java数值运算常见陷阱与规避方法

整数除法中的舍入问题 问题现象 当开发者预期进行浮点除法却误用整数除法时,会出现小数部分被截断的情况。典型错误模式如下: void process(int value) {double half = value / 2; // 整数除法导致截断// 使用half变量 }此时...

【JVM面试篇】高频八股汇总——类加载和类加载器

目录 1. 讲一下类加载过程&#xff1f; 2. Java创建对象的过程&#xff1f; 3. 对象的生命周期&#xff1f; 4. 类加载器有哪些&#xff1f; 5. 双亲委派模型的作用&#xff08;好处&#xff09;&#xff1f; 6. 讲一下类的加载和双亲委派原则&#xff1f; 7. 双亲委派模…...