LoongArch 五级流水线实现
在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。
mycpu_top.v
`include "mycpu.h"module id_stage(input clk ,input reset ,//allowininput es_allowin ,output ds_allowin ,//from fsinput fs_to_ds_valid,input [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,//to esoutput ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,//to fsoutput [`BR_BUS_WD -1:0] br_bus ,//to rf: for write backinput [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus //// output [ 4 :0] WB_dest,EXE_dest,MEM_dest,// input es_load_op ,// input [31 :0] EXE_result,MEM_result,WB_result
);
// wire br_stall; //增加
// wire load_stall;
wire br_taken;
reg ds_valid ;
wire [31:0] br_target;// assign br_bus = {br_stall,br_taken,br_target}; //修改wire ds_ready_go;wire [31 :0] fs_pc;
reg [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc ;
assign {ds_inst,ds_pc } = fs_to_ds_bus_r;wire rf_we ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we , //37:37rf_waddr, //36:32rf_wdata //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire load_op;
wire src1_is_pc;
wire src2_is_imm;
wire res_from_mem;
wire gr_we;
wire mem_we;
wire src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire inst_add_w;
wire inst_sub_w;
wire inst_slt;
wire inst_sltu;
wire inst_nor;
wire inst_and;
wire inst_or;
wire inst_xor;
wire inst_slli_w;
wire inst_srli_w;
wire inst_srai_w;
wire inst_addi_w;
wire inst_ld_w;
wire inst_st_w;
wire inst_jirl;
wire inst_b;
wire inst_bl;
wire inst_beq;
wire inst_bne;
wire inst_lu12i_w;wire need_ui5;
wire need_si12;
wire need_si16;
wire need_si20;
wire need_si26;
wire src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire rs_eq_rt;assign br_bus = {br_taken,br_target};assign ds_to_es_bus = {alu_op , //149:138load_op , //137:137src1_is_pc , //136:136src2_is_imm , //135:135gr_we , //134:134mem_we , //133:133dest , //132:128imm , //127:96rj_value , //95 :64rkd_value , //63 :32ds_pc //31 :0};assign ds_ready_go = ds_valid;
//lab4
// assign ds_ready_go = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go = ds_valid & ~load_stall;
assign ds_allowin = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26 = ds_inst[31:26];
assign op_25_22 = ds_inst[25:22];
assign op_21_20 = ds_inst[21:20];
assign op_19_15 = ds_inst[19:15];assign rd = ds_inst[ 4: 0];
assign rj = ds_inst[ 9: 5];
assign rk = ds_inst[14:10];assign i12 = ds_inst[21:10];
assign i20 = ds_inst[24: 5];
assign i16 = ds_inst[25:10];
assign i26 = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl = op_31_26_d[6'h13];
assign inst_b = op_31_26_d[6'h14];
assign inst_bl = op_31_26_d[6'h15];
assign inst_beq = op_31_26_d[6'h16];
assign inst_bne = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12 = inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16 = inst_jirl | inst_beq | inst_bne;
assign need_si20 = inst_lu12i_w;
assign need_si26 = inst_b | inst_bl;
assign src2_is_4 = inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4 :need_si20 ? {i20[19:0], 12'b0} :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc = inst_jirl | inst_bl;assign src2_is_imm = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w |inst_st_w |inst_lu12i_w|inst_jirl |inst_bl ;assign res_from_mem = inst_ld_w;
assign dst_is_r1 = inst_bl;
// 是否需要写入通用寄存器
assign gr_we = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we = inst_st_w;
//这里需要更改
assign dest = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest = dst_is_r31 ? 5'd31 :
// dst_is_rt ? rt :
// inst_no_dest ? 5'd0 : rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk (clk ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we (rf_we ),.waddr (rf_waddr ),.wdata (rf_wdata ));assign rj_value = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
// rs == MEM_dest?MEM_result:WB_result)
// : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
// rt == MEM_dest?MEM_result:WB_result)
// : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = ( inst_beq && rj_eq_rd|| inst_bne && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs = 1'b0;
// assign src2_no_rt = inst_addiu | load_op|inst_jal|inst_lui;// wire rs_wait,rt_wait;
// assign rs_wait = ~src1_no_rs & (rs!=5'd0)
// & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait = ~src2_no_rt & (rt!=5'd0)
// & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}}; //增加
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
// (rt_wait & (rt == EXE_dest) & es_load_op ); // wire inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
2. IF_stage.v
`include "mycpu.h"module if_stage(input clk ,input reset ,//allwoininput ds_allowin ,//brbusinput [`BR_BUS_WD -1:0] br_bus ,//to dsoutput fs_to_ds_valid ,output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,// inst sram interfaceoutput inst_sram_en ,output [ 3:0] inst_sram_we ,output [31:0] inst_sram_addr ,output [31:0] inst_sram_wdata,input [31:0] inst_sram_rdata
);reg fs_valid;//表示当前阶段是否有效
wire fs_ready_go;
wire fs_allowin; //并确定 IF 阶段是否接受取指令的请求
wire to_fs_valid;//表示是否可以将指令传递到下一个阶段// wire pre_fs_ready_go; //增加
// wire br_stall; //增加
// assign to_fs_valid = ~reset && pre_fs_ready_go;//修改
// assign pre_fs_ready_go = ~br_stall; //增加wire [31:0] seq_pc;
wire [31:0] nextpc;wire br_taken;
wire [31:0] br_target;
assign {br_taken,br_target} = br_bus;
// assign {br_stall,br_taken,br_target} = br_bus; //修改 分支预测
// assign inst_sram_en = to_fs_valid && fs_allowin && ~br_stall; //修改
wire [31:0] fs_inst;
reg [31:0] fs_pc;
assign fs_to_ds_bus = {fs_inst ,fs_pc };// pre-IF stage
assign to_fs_valid = ~reset;
assign seq_pc = fs_pc + 3'h4;
assign nextpc = br_taken ? br_target : seq_pc; // IF stage
assign fs_ready_go = 1'b1;
assign fs_allowin = !fs_valid || (fs_ready_go && ds_allowin);
assign fs_to_ds_valid = fs_valid && fs_ready_go;always @(posedge clk) beginif (reset) beginfs_valid <= 1'b0;endelse if (fs_allowin) beginfs_valid <= to_fs_valid;endif (reset) beginfs_pc <= 32'h1bfffffc; //trick: to make nextpc be 0x1c000000 during reset endelse if (to_fs_valid && fs_allowin) beginfs_pc <= nextpc;end
endassign inst_sram_en = to_fs_valid && fs_allowin;
assign inst_sram_we = 4'h0;
assign inst_sram_addr = nextpc;
assign inst_sram_wdata = 32'b0;assign fs_inst = inst_sram_rdata;endmodule
3. ID_stage
`include "mycpu.h"module id_stage(input clk ,input reset ,//allowininput es_allowin ,output ds_allowin ,//from fsinput fs_to_ds_valid,input [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,//to esoutput ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,//to fsoutput [`BR_BUS_WD -1:0] br_bus ,//to rf: for write backinput [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus //// output [ 4 :0] WB_dest,EXE_dest,MEM_dest,// input es_load_op ,// input [31 :0] EXE_result,MEM_result,WB_result
);
// wire br_stall; //增加
// wire load_stall;
wire br_taken;
reg ds_valid ;
wire [31:0] br_target;// assign br_bus = {br_stall,br_taken,br_target}; //修改wire ds_ready_go;wire [31 :0] fs_pc;
reg [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc ;
assign {ds_inst,ds_pc } = fs_to_ds_bus_r;wire rf_we ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we , //37:37rf_waddr, //36:32rf_wdata //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire load_op;
wire src1_is_pc;
wire src2_is_imm;
wire res_from_mem;
wire gr_we;
wire mem_we;
wire src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire inst_add_w;
wire inst_sub_w;
wire inst_slt;
wire inst_sltu;
wire inst_nor;
wire inst_and;
wire inst_or;
wire inst_xor;
wire inst_slli_w;
wire inst_srli_w;
wire inst_srai_w;
wire inst_addi_w;
wire inst_ld_w;
wire inst_st_w;
wire inst_jirl;
wire inst_b;
wire inst_bl;
wire inst_beq;
wire inst_bne;
wire inst_lu12i_w;wire need_ui5;
wire need_si12;
wire need_si16;
wire need_si20;
wire need_si26;
wire src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire rs_eq_rt;assign br_bus = {br_taken,br_target};assign ds_to_es_bus = {alu_op , //149:138load_op , //137:137src1_is_pc , //136:136src2_is_imm , //135:135gr_we , //134:134mem_we , //133:133dest , //132:128imm , //127:96rj_value , //95 :64rkd_value , //63 :32ds_pc //31 :0};assign ds_ready_go = ds_valid;
//lab4
// assign ds_ready_go = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go = ds_valid & ~load_stall;
assign ds_allowin = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26 = ds_inst[31:26];
assign op_25_22 = ds_inst[25:22];
assign op_21_20 = ds_inst[21:20];
assign op_19_15 = ds_inst[19:15];assign rd = ds_inst[ 4: 0];
assign rj = ds_inst[ 9: 5];
assign rk = ds_inst[14:10];assign i12 = ds_inst[21:10];
assign i20 = ds_inst[24: 5];
assign i16 = ds_inst[25:10];
assign i26 = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl = op_31_26_d[6'h13];
assign inst_b = op_31_26_d[6'h14];
assign inst_bl = op_31_26_d[6'h15];
assign inst_beq = op_31_26_d[6'h16];
assign inst_bne = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12 = inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16 = inst_jirl | inst_beq | inst_bne;
assign need_si20 = inst_lu12i_w;
assign need_si26 = inst_b | inst_bl;
assign src2_is_4 = inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4 :need_si20 ? {i20[19:0], 12'b0} :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc = inst_jirl | inst_bl;assign src2_is_imm = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w |inst_st_w |inst_lu12i_w|inst_jirl |inst_bl ;assign res_from_mem = inst_ld_w;
assign dst_is_r1 = inst_bl;
// 是否需要写入通用寄存器
assign gr_we = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we = inst_st_w;
//这里需要更改
assign dest = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest = dst_is_r31 ? 5'd31 :
// dst_is_rt ? rt :
// inst_no_dest ? 5'd0 : rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk (clk ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we (rf_we ),.waddr (rf_waddr ),.wdata (rf_wdata ));assign rj_value = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
// rs == MEM_dest?MEM_result:WB_result)
// : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
// rt == MEM_dest?MEM_result:WB_result)
// : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = ( inst_beq && rj_eq_rd|| inst_bne && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs = 1'b0;
// assign src2_no_rt = inst_addiu | load_op|inst_jal|inst_lui;// wire rs_wait,rt_wait;
// assign rs_wait = ~src1_no_rs & (rs!=5'd0)
// & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait = ~src2_no_rt & (rt!=5'd0)
// & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}}; //增加
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
// (rt_wait & (rt == EXE_dest) & es_load_op ); // wire inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
4. EXE_stage
`include "mycpu.h"module exe_stage(input clk ,input reset ,//allowininput ms_allowin ,output es_allowin ,//from dsinput ds_to_es_valid,input [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,//to msoutput es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus ,// data sram interfaceoutput data_sram_en ,output [ 3:0] data_sram_we ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest ,// output es_load_op // output [31:0] EXE_result
);reg es_valid ;
wire es_ready_go ;reg [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op ;
wire es_load_op ;
// wire es_src1_is_sa ;
wire es_src1_is_pc ;
wire es_src2_is_imm;
// wire es_src2_is_8 ;
wire es_gr_we ;
wire es_mem_we ;
wire [ 4:0] es_dest ;
wire [31:0] es_imm ;
wire [31:0] es_rj_value ;
wire [31:0] es_rkd_value ;
wire [31:0] es_pc ;
assign {es_alu_op , //149:138es_load_op , //137:137es_src1_is_pc , //136:136es_src2_is_imm , //135:135es_gr_we , //134:134es_mem_we , //133:133es_dest , //132:128es_imm , //127:96es_rj_value , //95 :64es_rkd_value , //63 :32es_pc //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1 ;
wire [31:0] es_alu_src2 ;
wire [31:0] es_alu_result ;wire es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem, //70:70es_gr_we , //69:69es_dest , //68:64es_alu_result , //63:32es_pc //31:0};assign es_ready_go = 1'b1;
assign es_allowin = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid = es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa ? {27'b0, es_imm[10:6]} :
// es_src1_is_pc ? es_pc[31:0] :
// es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} :
// es_src2_is_8 ? 32'd8 :
// es_rt_value;
assign es_alu_src1 = es_src1_is_pc ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op (es_alu_op ),.alu_src1 (es_alu_src1 ),.alu_src2 (es_alu_src2 ),.alu_result (es_alu_result));assign data_sram_en = 1'b1;
assign data_sram_we = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}};
// assign EXE_result = es_alu_result;
endmodule
5. MEM_stage
`include "mycpu.h"module exe_stage(input clk ,input reset ,//allowininput ms_allowin ,output es_allowin ,//from dsinput ds_to_es_valid,input [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,//to msoutput es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus ,// data sram interfaceoutput data_sram_en ,output [ 3:0] data_sram_we ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest ,// output es_load_op // output [31:0] EXE_result
);reg es_valid ;
wire es_ready_go ;reg [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op ;
wire es_load_op ;
// wire es_src1_is_sa ;
wire es_src1_is_pc ;
wire es_src2_is_imm;
// wire es_src2_is_8 ;
wire es_gr_we ;
wire es_mem_we ;
wire [ 4:0] es_dest ;
wire [31:0] es_imm ;
wire [31:0] es_rj_value ;
wire [31:0] es_rkd_value ;
wire [31:0] es_pc ;
assign {es_alu_op , //149:138es_load_op , //137:137es_src1_is_pc , //136:136es_src2_is_imm , //135:135es_gr_we , //134:134es_mem_we , //133:133es_dest , //132:128es_imm , //127:96es_rj_value , //95 :64es_rkd_value , //63 :32es_pc //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1 ;
wire [31:0] es_alu_src2 ;
wire [31:0] es_alu_result ;wire es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem, //70:70es_gr_we , //69:69es_dest , //68:64es_alu_result , //63:32es_pc //31:0};assign es_ready_go = 1'b1;
assign es_allowin = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid = es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa ? {27'b0, es_imm[10:6]} :
// es_src1_is_pc ? es_pc[31:0] :
// es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} :
// es_src2_is_8 ? 32'd8 :
// es_rt_value;
assign es_alu_src1 = es_src1_is_pc ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op (es_alu_op ),.alu_src1 (es_alu_src1 ),.alu_src2 (es_alu_src2 ),.alu_result (es_alu_result));assign data_sram_en = 1'b1;
assign data_sram_we = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}};
// assign EXE_result = es_alu_result;
endmodule
6. WB_stage
`include "mycpu.h"module wb_stage(input clk ,input reset ,//allowinoutput ws_allowin ,//from msinput ms_to_ws_valid,input [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus ,//to rf: for write backoutput [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus ,//trace debug interfaceoutput [31:0] debug_wb_pc ,output [ 3:0] debug_wb_rf_we ,output [ 4:0] debug_wb_rf_wnum,output [31:0] debug_wb_rf_wdata // lab4// output [ 4:0] WB_dest ,//lab5// output [31:0] WB_result
);
reg ws_valid;
wire ws_ready_go;reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r;
wire ws_gr_we;
wire [ 4:0] ws_dest;
wire [31:0] ws_final_result;
wire [31:0] ws_pc;
assign {ws_gr_we , //69:69ws_dest , //68:64ws_final_result, //63:32ws_pc //31:0} = ms_to_ws_bus_r;wire rf_we;
wire [4 :0] rf_waddr;
wire [31:0] rf_wdata;
assign ws_to_rf_bus = {rf_we , //37:37rf_waddr, //36:32rf_wdata //31:0};assign ws_ready_go = 1'b1;
assign ws_allowin = !ws_valid || ws_ready_go;
always @(posedge clk) beginif (reset) beginws_valid <= 1'b0;endelse if (ws_allowin) beginws_valid <= ms_to_ws_valid;endif (ms_to_ws_valid && ws_allowin) beginms_to_ws_bus_r <= ms_to_ws_bus;end
endassign rf_we = ws_gr_we&&ws_valid;
assign rf_waddr = ws_dest;
assign rf_wdata = ws_final_result;// debug info generate
assign debug_wb_pc = ws_pc;
assign debug_wb_rf_we = {4{rf_we}};
assign debug_wb_rf_wnum = ws_dest;
assign debug_wb_rf_wdata = ws_final_result;// assign WB_dest = ws_dest & {5{ws_valid}};
// assign WB_result = ws_final_result;
endmodule相关文章:
LoongArch 五级流水线实现
在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。 mycpu_top.v include "mycpu.h"module id_stage(input clk ,input reset ,//allowininput …...
「Qt中文教程指南」如何创建基于Qt Widget的应用程序(四)
Qt 是目前最先进、最完整的跨平台C开发工具。它不仅完全实现了一次编写,所有平台无差别运行,更提供了几乎所有开发过程中需要用到的工具。如今,Qt已被运用于超过70个行业、数千家企业,支持数百万设备及应用。 本文描述了如何使用…...
11、SpringCloud -- 利用redis优化查询秒杀商品的数据(就是可以把商品数据先存到redis中)
目录 秒杀商品数据存到redis中并查询需求hash理解代码:RedisService商品数据初始化:查询 测试: 秒杀商品数据存到redis中并查询 需求 利用redis优化查询秒杀商品的数据,就是可以把商品数据先存到redis中,要查的时候先…...
计算节点上iptables安全组分析
计算节点上iptables安全组分析 之前介绍过neutron 安全组基于iptables 和 ct 实现,分析一下计算节点上面的neutron 安全组的iptables,加深一下理解iptables以及安全组的实现。(PS: 如下基于openstack stein) 查看某计算节点上面的iptables …...
香港科技大学广州|可持续能源与环境学域博士招生宣讲会—上海专场!!!(暨全额奖学金政策)
香港科技大学广州|可持续能源与环境学域博士招生宣讲会—上海专场!!!(暨全额奖学金政策) “面向未来改变游戏规则的——可持续能源与环境学域” ���专注于能源环境跨学…...
有没有什么网站可以在线做视频脚本?批量制作视频,批量替换素材混剪?
随着视频内容的普及和需求的不断增长,越来越多的个人和团队开始涉足视频制作领域。为了提高效率并满足大量制作需求,许多在线视频制作工具应运而生,提供了一系列便捷的功能,如在线视频脚本编辑、批量制作视频、批量替换素材和混剪…...
【python数学建模】特征值与特征向量运用
1、求数列通项 (1)转化为求矩阵的幂次问题 例:求斐波那契数列的通项公式 已知斐波那契数列满足: F k 2 F k 1 F k F_{k2}F_{k1}F_{k} Fk2Fk1Fk (a) 降阶:将二阶差分方程转化为一阶…...
什么是 CNN? 卷积神经网络? 怎么用 CNN 进行分类?(1)
先看卷积是啥,url: https://www.bilibili.com/video/BV1JX4y1K7Dr/?spm_id_from333.337.search-card.all.click&vd_source7a1a0bc74158c6993c7355c5490fc600 下面这个式子就是卷积 看完了,感觉似懂非懂 下一个参考视频:https://www.y…...
java解决修改图片尺寸,压缩图片后出现背景变黑,图片字体模糊问题
将以下数学公式的图片使用Hutool提供的图片工具类改变尺寸 代码如下: package com.jason.common.file.word;import cn.hutool.core.img.ImgUtil; import cn.hutool.core.io.FileUtil;import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage;…...
jq/js检测鼠标指针移动离开页面
通过 mouseout 鼠标事件,判断鼠标去往哪个元素 知识点:relatedTarget 事件属性 定义和用法 relatedTarget 事件属性返回与事件的目标节点相关的节点。 对于 mouseover 事件来说,该属性是鼠标指针移到目标节点上时所离开的那个节点。 对于 …...
ICC2: 如何在显示GUI操作产生的命令
我正在「拾陆楼」和朋友们讨论有趣的话题,你⼀起来吧? 拾陆楼知识星球入口 ICC2:自定义快捷键和菜单 VIEW -> Perference -> Global Settings 把display commands in logging console 下面几个都勾上即可。...
内网渗透——macOS上搭建Web服务器
# 公网访问macOS本地web服务器【内网穿透】 文章目录 1. 启动Apache服务器2. 公网访问本地web服务2.1 本地安装配置cpolar2.2 创建隧道2.3 测试访问公网地址3. 配置固定二级子域名3.1 保留一个二级子域名3.2 配置二级子域名4. 测试访问公网固定二级子域名 以macOS自带的Apache…...
Centos下用nodejs实现一个简单的web服务器
WebRTC是音视频直播中最常用的一个框架,在使用的过程中,我们就需要实现一个服务器端。本文以nodejs实现一个服务器为例,讲述一下在centos下如何用nodejs实现一个简单的web服务器。 一、安装nodejs 在linux环境下安装nodejs有多重方式&#x…...
3.10每日一题(三角有理函数积分(三角函数加减乘除))
1、通过类型判别方法>判断出为凑 tanx 2、加项减项拆常用的积分公式 注:tanx的导数是:cosx的平方分之一 cosx的平方分之一 1 tanx arctanx的求导公式要记住...
python练习(猜数字,99乘法表)
python练习(猜数字,99乘法表) 猜数字 import random num1random.choice(range(1,101))for i in range(11):num2input("plz input a number:")num2int(num2)if num1<num2:print("太大了,小一点")elif num1>num2:print("…...
正确部署Baichuan2(Ubuntu20.4) 步骤及可能出现的问题
部署其实是不太复杂的,但实际上也耗费了接近2-3天的时间去不断的设置 1 硬件配置信息 采用esxi 虚拟化的方式将T4 卡穿透给esxi 种的ubuntu20.4虚拟机 CPU给到8 core 内存至少32GB以上 T4卡是16GB 2 预先准备OS环境 这里使用的是ubuntu20.4版本,esxi中需要设置uefI启动方…...
docker 部署prometheus和grafana
1.启动node 容器 docker run -d -p 9100:9100 -v "/proc:/host/proc:ro" -v "/sys:/host/sys:ro" -v "/:/rootfs:ro" --net"bridge" prom/node-exporter 2.访问http://192.168.1.122:9100/metrics 3.创建文件/home/prometheus/ 下…...
在本地模拟C/S,Socket套接字的使用
public class SocketTCP01Server {public static void main(String[] args) throws IOException {/**1.在本机的 9999 端口监听 ,等待连接细节: 要求在本机没有其他服务在监听999细节:这个ServerSocket 可以通过accept()返回多个Socket[多个客…...
香港科技大学广州|可持续能源与环境学域博士招生宣讲会—东南大学专场!!!(暨全额奖学金政策)
香港科技大学广州|可持续能源与环境学域博士招生宣讲会—东南大学专场!!!(暨全额奖学金政策) “面向未来改变游戏规则的——可持续能源与环境学域” 专注于能源环境跨学科尖端技术研究 培养可持续能源技术…...
[Leetcode] 0108. 将有序数组转换为二叉搜索树
108. 将有序数组转换为二叉搜索树 题目描述 给你一个整数数组 nums ,其中元素已经按 升序 排列,请你将其转换为一棵 高度平衡 二叉搜索树。 高度平衡 二叉树是一棵满足「每个节点的左右两个子树的高度差的绝对值不超过 1 」的二叉树。 示例 1:…...
7.4.分块查找
一.分块查找的算法思想: 1.实例: 以上述图片的顺序表为例, 该顺序表的数据元素从整体来看是乱序的,但如果把这些数据元素分成一块一块的小区间, 第一个区间[0,1]索引上的数据元素都是小于等于10的, 第二…...
多模态2025:技术路线“神仙打架”,视频生成冲上云霄
文|魏琳华 编|王一粟 一场大会,聚集了中国多模态大模型的“半壁江山”。 智源大会2025为期两天的论坛中,汇集了学界、创业公司和大厂等三方的热门选手,关于多模态的集中讨论达到了前所未有的热度。其中,…...
srs linux
下载编译运行 git clone https:///ossrs/srs.git ./configure --h265on make 编译完成后即可启动SRS # 启动 ./objs/srs -c conf/srs.conf # 查看日志 tail -n 30 -f ./objs/srs.log 开放端口 默认RTMP接收推流端口是1935,SRS管理页面端口是8080,可…...
【算法训练营Day07】字符串part1
文章目录 反转字符串反转字符串II替换数字 反转字符串 题目链接:344. 反转字符串 双指针法,两个指针的元素直接调转即可 class Solution {public void reverseString(char[] s) {int head 0;int end s.length - 1;while(head < end) {char temp …...
BCS 2025|百度副总裁陈洋:智能体在安全领域的应用实践
6月5日,2025全球数字经济大会数字安全主论坛暨北京网络安全大会在国家会议中心隆重开幕。百度副总裁陈洋受邀出席,并作《智能体在安全领域的应用实践》主题演讲,分享了在智能体在安全领域的突破性实践。他指出,百度通过将安全能力…...
sipsak:SIP瑞士军刀!全参数详细教程!Kali Linux教程!
简介 sipsak 是一个面向会话初始协议 (SIP) 应用程序开发人员和管理员的小型命令行工具。它可以用于对 SIP 应用程序和设备进行一些简单的测试。 sipsak 是一款 SIP 压力和诊断实用程序。它通过 sip-uri 向服务器发送 SIP 请求,并检查收到的响应。它以以下模式之一…...
管理学院权限管理系统开发总结
文章目录 🎓 管理学院权限管理系统开发总结 - 现代化Web应用实践之路📝 项目概述🏗️ 技术架构设计后端技术栈前端技术栈 💡 核心功能特性1. 用户管理模块2. 权限管理系统3. 统计报表功能4. 用户体验优化 🗄️ 数据库设…...
七、数据库的完整性
七、数据库的完整性 主要内容 7.1 数据库的完整性概述 7.2 实体完整性 7.3 参照完整性 7.4 用户定义的完整性 7.5 触发器 7.6 SQL Server中数据库完整性的实现 7.7 小结 7.1 数据库的完整性概述 数据库完整性的含义 正确性 指数据的合法性 有效性 指数据是否属于所定…...
20个超级好用的 CSS 动画库
分享 20 个最佳 CSS 动画库。 它们中的大多数将生成纯 CSS 代码,而不需要任何外部库。 1.Animate.css 一个开箱即用型的跨浏览器动画库,可供你在项目中使用。 2.Magic Animations CSS3 一组简单的动画,可以包含在你的网页或应用项目中。 3.An…...
Bean 作用域有哪些?如何答出技术深度?
导语: Spring 面试绕不开 Bean 的作用域问题,这是面试官考察候选人对 Spring 框架理解深度的常见方式。本文将围绕“Spring 中的 Bean 作用域”展开,结合典型面试题及实战场景,帮你厘清重点,打破模板式回答,…...
