当前位置: 首页 > news >正文

LoongArch 五级流水线实现

在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。

mycpu_top.v
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
2. IF_stage.v
`include "mycpu.h"module if_stage(input                          clk            ,input                          reset          ,//allwoininput                          ds_allowin     ,//brbusinput  [`BR_BUS_WD       -1:0] br_bus         ,//to dsoutput                         fs_to_ds_valid ,output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus   ,// inst sram interfaceoutput        inst_sram_en   ,output [ 3:0] inst_sram_we  ,output [31:0] inst_sram_addr ,output [31:0] inst_sram_wdata,input  [31:0] inst_sram_rdata
);reg         fs_valid;//表示当前阶段是否有效
wire        fs_ready_go;
wire        fs_allowin; //并确定 IF 阶段是否接受取指令的请求
wire        to_fs_valid;//表示是否可以将指令传递到下一个阶段// wire         pre_fs_ready_go;  //增加
// wire         br_stall;        //增加
// assign to_fs_valid      = ~reset && pre_fs_ready_go;//修改
// assign pre_fs_ready_go  = ~br_stall;  //增加wire [31:0] seq_pc;
wire [31:0] nextpc;wire        br_taken;
wire [31:0] br_target;
assign {br_taken,br_target} = br_bus;
// assign {br_stall,br_taken,br_target} = br_bus; //修改     分支预测
// assign inst_sram_en = to_fs_valid && fs_allowin && ~br_stall; //修改
wire [31:0] fs_inst;
reg  [31:0] fs_pc;
assign fs_to_ds_bus = {fs_inst ,fs_pc   };// pre-IF stage
assign to_fs_valid  = ~reset;
assign seq_pc       = fs_pc + 3'h4;
assign nextpc       = br_taken ? br_target : seq_pc; // IF stage
assign fs_ready_go    = 1'b1;
assign fs_allowin     = !fs_valid || (fs_ready_go && ds_allowin);
assign fs_to_ds_valid =  fs_valid && fs_ready_go;always @(posedge clk) beginif (reset) beginfs_valid <= 1'b0;endelse if (fs_allowin) beginfs_valid <= to_fs_valid;endif (reset) beginfs_pc <= 32'h1bfffffc;  //trick: to make nextpc be 0x1c000000 during reset endelse if (to_fs_valid && fs_allowin) beginfs_pc <= nextpc;end
endassign inst_sram_en    = to_fs_valid && fs_allowin;
assign inst_sram_we   = 4'h0;
assign inst_sram_addr  = nextpc;
assign inst_sram_wdata = 32'b0;assign fs_inst         = inst_sram_rdata;endmodule
3. ID_stage
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
4. EXE_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
5. MEM_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
6. WB_stage
`include "mycpu.h"module wb_stage(input                           clk           ,input                           reset         ,//allowinoutput                          ws_allowin    ,//from msinput                           ms_to_ws_valid,input  [`MS_TO_WS_BUS_WD -1:0]  ms_to_ws_bus  ,//to rf: for write backoutput [`WS_TO_RF_BUS_WD -1:0]  ws_to_rf_bus  ,//trace debug interfaceoutput [31:0] debug_wb_pc     ,output [ 3:0] debug_wb_rf_we ,output [ 4:0] debug_wb_rf_wnum,output [31:0] debug_wb_rf_wdata // lab4// output [ 4:0] WB_dest         ,//lab5// output [31:0] WB_result
);
reg         ws_valid;
wire        ws_ready_go;reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r;
wire        ws_gr_we;
wire [ 4:0] ws_dest;
wire [31:0] ws_final_result;
wire [31:0] ws_pc;
assign {ws_gr_we       ,  //69:69ws_dest        ,  //68:64ws_final_result,  //63:32ws_pc             //31:0} = ms_to_ws_bus_r;wire        rf_we;
wire [4 :0] rf_waddr;
wire [31:0] rf_wdata;
assign ws_to_rf_bus = {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0};assign ws_ready_go = 1'b1;
assign ws_allowin  = !ws_valid || ws_ready_go;
always @(posedge clk) beginif (reset) beginws_valid <= 1'b0;endelse if (ws_allowin) beginws_valid <= ms_to_ws_valid;endif (ms_to_ws_valid && ws_allowin) beginms_to_ws_bus_r <= ms_to_ws_bus;end
endassign rf_we    = ws_gr_we&&ws_valid;
assign rf_waddr = ws_dest;
assign rf_wdata = ws_final_result;// debug info generate
assign debug_wb_pc       = ws_pc;
assign debug_wb_rf_we   = {4{rf_we}};
assign debug_wb_rf_wnum  = ws_dest;
assign debug_wb_rf_wdata = ws_final_result;// assign WB_dest = ws_dest & {5{ws_valid}};
// assign WB_result = ws_final_result;
endmodule

相关文章:

LoongArch 五级流水线实现

在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。 mycpu_top.v include "mycpu.h"module id_stage(input clk ,input reset ,//allowininput …...

「Qt中文教程指南」如何创建基于Qt Widget的应用程序(四)

Qt 是目前最先进、最完整的跨平台C开发工具。它不仅完全实现了一次编写&#xff0c;所有平台无差别运行&#xff0c;更提供了几乎所有开发过程中需要用到的工具。如今&#xff0c;Qt已被运用于超过70个行业、数千家企业&#xff0c;支持数百万设备及应用。 本文描述了如何使用…...

11、SpringCloud -- 利用redis优化查询秒杀商品的数据(就是可以把商品数据先存到redis中)

目录 秒杀商品数据存到redis中并查询需求hash理解代码&#xff1a;RedisService商品数据初始化&#xff1a;查询 测试&#xff1a; 秒杀商品数据存到redis中并查询 需求 利用redis优化查询秒杀商品的数据&#xff0c;就是可以把商品数据先存到redis中&#xff0c;要查的时候先…...

计算节点上iptables安全组分析

计算节点上iptables安全组分析 之前介绍过neutron 安全组基于iptables 和 ct 实现&#xff0c;分析一下计算节点上面的neutron 安全组的iptables&#xff0c;加深一下理解iptables以及安全组的实现。&#xff08;PS: 如下基于openstack stein) 查看某计算节点上面的iptables …...

香港科技大学广州|可持续能源与环境学域博士招生宣讲会—上海专场!!!(暨全额奖学金政策)

香港科技大学广州&#xff5c;可持续能源与环境学域博士招生宣讲会—上海专场&#xff01;&#xff01;&#xff01;&#xff08;暨全额奖学金政策&#xff09; “面向未来改变游戏规则的——可持续能源与环境学域” &#xfffd;&#xfffd;&#xfffd;专注于能源环境跨学…...

有没有什么网站可以在线做视频脚本?批量制作视频,批量替换素材混剪?

随着视频内容的普及和需求的不断增长&#xff0c;越来越多的个人和团队开始涉足视频制作领域。为了提高效率并满足大量制作需求&#xff0c;许多在线视频制作工具应运而生&#xff0c;提供了一系列便捷的功能&#xff0c;如在线视频脚本编辑、批量制作视频、批量替换素材和混剪…...

【python数学建模】特征值与特征向量运用

1、求数列通项 &#xff08;1&#xff09;转化为求矩阵的幂次问题 例&#xff1a;求斐波那契数列的通项公式 已知斐波那契数列满足&#xff1a; F k 2 F k 1 F k F_{k2}F_{k1}F_{k} Fk2​Fk1​Fk​ &#xff08;a&#xff09; 降阶&#xff1a;将二阶差分方程转化为一阶…...

什么是 CNN? 卷积神经网络? 怎么用 CNN 进行分类?(1)

先看卷积是啥&#xff0c;url: https://www.bilibili.com/video/BV1JX4y1K7Dr/?spm_id_from333.337.search-card.all.click&vd_source7a1a0bc74158c6993c7355c5490fc600 下面这个式子就是卷积 看完了&#xff0c;感觉似懂非懂 下一个参考视频&#xff1a;https://www.y…...

java解决修改图片尺寸,压缩图片后出现背景变黑,图片字体模糊问题

将以下数学公式的图片使用Hutool提供的图片工具类改变尺寸 代码如下: package com.jason.common.file.word;import cn.hutool.core.img.ImgUtil; import cn.hutool.core.io.FileUtil;import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage;…...

jq/js检测鼠标指针移动离开页面

通过 mouseout 鼠标事件&#xff0c;判断鼠标去往哪个元素 知识点&#xff1a;relatedTarget 事件属性 定义和用法 relatedTarget 事件属性返回与事件的目标节点相关的节点。 对于 mouseover 事件来说&#xff0c;该属性是鼠标指针移到目标节点上时所离开的那个节点。 对于 …...

ICC2: 如何在显示GUI操作产生的命令

我正在「拾陆楼」和朋友们讨论有趣的话题&#xff0c;你⼀起来吧&#xff1f; 拾陆楼知识星球入口 ICC2&#xff1a;自定义快捷键和菜单 VIEW -> Perference -> Global Settings 把display commands in logging console 下面几个都勾上即可。...

内网渗透——macOS上搭建Web服务器

# 公网访问macOS本地web服务器【内网穿透】 文章目录 1. 启动Apache服务器2. 公网访问本地web服务2.1 本地安装配置cpolar2.2 创建隧道2.3 测试访问公网地址3. 配置固定二级子域名3.1 保留一个二级子域名3.2 配置二级子域名4. 测试访问公网固定二级子域名 以macOS自带的Apache…...

Centos下用nodejs实现一个简单的web服务器

WebRTC是音视频直播中最常用的一个框架&#xff0c;在使用的过程中&#xff0c;我们就需要实现一个服务器端。本文以nodejs实现一个服务器为例&#xff0c;讲述一下在centos下如何用nodejs实现一个简单的web服务器。 一、安装nodejs 在linux环境下安装nodejs有多重方式&#x…...

3.10每日一题(三角有理函数积分(三角函数加减乘除))

1、通过类型判别方法>判断出为凑 tanx 2、加项减项拆常用的积分公式 注&#xff1a;tanx的导数是&#xff1a;cosx的平方分之一 cosx的平方分之一 1 tanx arctanx的求导公式要记住...

python练习(猜数字,99乘法表)

python练习(猜数字&#xff0c;99乘法表) 猜数字 import random num1random.choice(range(1,101))for i in range(11):num2input("plz input a number:")num2int(num2)if num1<num2:print("太大了&#xff0c;小一点")elif num1>num2:print("…...

正确部署Baichuan2(Ubuntu20.4) 步骤及可能出现的问题

部署其实是不太复杂的,但实际上也耗费了接近2-3天的时间去不断的设置 1 硬件配置信息 采用esxi 虚拟化的方式将T4 卡穿透给esxi 种的ubuntu20.4虚拟机 CPU给到8 core 内存至少32GB以上 T4卡是16GB 2 预先准备OS环境 这里使用的是ubuntu20.4版本,esxi中需要设置uefI启动方…...

docker 部署prometheus和grafana

1.启动node 容器 docker run -d -p 9100:9100 -v "/proc:/host/proc:ro" -v "/sys:/host/sys:ro" -v "/:/rootfs:ro" --net"bridge" prom/node-exporter 2.访问http://192.168.1.122:9100/metrics 3.创建文件/home/prometheus/ 下…...

在本地模拟C/S,Socket套接字的使用

public class SocketTCP01Server {public static void main(String[] args) throws IOException {/**1.在本机的 9999 端口监听 &#xff0c;等待连接细节&#xff1a; 要求在本机没有其他服务在监听999细节&#xff1a;这个ServerSocket 可以通过accept()返回多个Socket[多个客…...

香港科技大学广州|可持续能源与环境学域博士招生宣讲会—东南大学专场!!!(暨全额奖学金政策)

香港科技大学广州&#xff5c;可持续能源与环境学域博士招生宣讲会—东南大学专场&#xff01;&#xff01;&#xff01;&#xff08;暨全额奖学金政策&#xff09; “面向未来改变游戏规则的——可持续能源与环境学域” 专注于能源环境跨学科尖端技术研究 培养可持续能源技术…...

[Leetcode] 0108. 将有序数组转换为二叉搜索树

108. 将有序数组转换为二叉搜索树 题目描述 给你一个整数数组 nums &#xff0c;其中元素已经按 升序 排列&#xff0c;请你将其转换为一棵 高度平衡 二叉搜索树。 高度平衡 二叉树是一棵满足「每个节点的左右两个子树的高度差的绝对值不超过 1 」的二叉树。 示例 1&#xff1a…...

【Python】 -- 趣味代码 - 小恐龙游戏

文章目录 文章目录 00 小恐龙游戏程序设计框架代码结构和功能游戏流程总结01 小恐龙游戏程序设计02 百度网盘地址00 小恐龙游戏程序设计框架 这段代码是一个基于 Pygame 的简易跑酷游戏的完整实现,玩家控制一个角色(龙)躲避障碍物(仙人掌和乌鸦)。以下是代码的详细介绍:…...

现代密码学 | 椭圆曲线密码学—附py代码

Elliptic Curve Cryptography 椭圆曲线密码学&#xff08;ECC&#xff09;是一种基于有限域上椭圆曲线数学特性的公钥加密技术。其核心原理涉及椭圆曲线的代数性质、离散对数问题以及有限域上的运算。 椭圆曲线密码学是多种数字签名算法的基础&#xff0c;例如椭圆曲线数字签…...

SpringTask-03.入门案例

一.入门案例 启动类&#xff1a; package com.sky;import lombok.extern.slf4j.Slf4j; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.cache.annotation.EnableCach…...

如何在网页里填写 PDF 表格?

有时候&#xff0c;你可能希望用户能在你的网站上填写 PDF 表单。然而&#xff0c;这件事并不简单&#xff0c;因为 PDF 并不是一种原生的网页格式。虽然浏览器可以显示 PDF 文件&#xff0c;但原生并不支持编辑或填写它们。更糟的是&#xff0c;如果你想收集表单数据&#xff…...

微软PowerBI考试 PL300-在 Power BI 中清理、转换和加载数据

微软PowerBI考试 PL300-在 Power BI 中清理、转换和加载数据 Power Query 具有大量专门帮助您清理和准备数据以供分析的功能。 您将了解如何简化复杂模型、更改数据类型、重命名对象和透视数据。 您还将了解如何分析列&#xff0c;以便知晓哪些列包含有价值的数据&#xff0c;…...

html css js网页制作成品——HTML+CSS榴莲商城网页设计(4页)附源码

目录 一、&#x1f468;‍&#x1f393;网站题目 二、✍️网站描述 三、&#x1f4da;网站介绍 四、&#x1f310;网站效果 五、&#x1fa93; 代码实现 &#x1f9f1;HTML 六、&#x1f947; 如何让学习不再盲目 七、&#x1f381;更多干货 一、&#x1f468;‍&#x1f…...

Xen Server服务器释放磁盘空间

disk.sh #!/bin/bashcd /run/sr-mount/e54f0646-ae11-0457-b64f-eba4673b824c # 全部虚拟机物理磁盘文件存储 a$(ls -l | awk {print $NF} | cut -d. -f1) # 使用中的虚拟机物理磁盘文件 b$(xe vm-disk-list --multiple | grep uuid | awk {print $NF})printf "%s\n"…...

逻辑回归暴力训练预测金融欺诈

简述 「使用逻辑回归暴力预测金融欺诈&#xff0c;并不断增加特征维度持续测试」的做法&#xff0c;体现了一种逐步建模与迭代验证的实验思路&#xff0c;在金融欺诈检测中非常有价值&#xff0c;本文作为一篇回顾性记录了早年间公司给某行做反欺诈预测用到的技术和思路。百度…...

MacOS下Homebrew国内镜像加速指南(2025最新国内镜像加速)

macos brew国内镜像加速方法 brew install 加速formula.jws.json下载慢加速 &#x1f37a; 最新版brew安装慢到怀疑人生&#xff1f;别怕&#xff0c;教你轻松起飞&#xff01; 最近Homebrew更新至最新版&#xff0c;每次执行 brew 命令时都会自动从官方地址 https://formulae.…...

Linux中INADDR_ANY详解

在Linux网络编程中&#xff0c;INADDR_ANY 是一个特殊的IPv4地址常量&#xff08;定义在 <netinet/in.h> 头文件中&#xff09;&#xff0c;用于表示绑定到所有可用网络接口的地址。它是服务器程序中的常见用法&#xff0c;允许套接字监听所有本地IP地址上的连接请求。 关…...