/* * PicoRV32 -- A Small RISC-V (RV32I) Processor Core * * Copyright (C) 2015 Claire Xenia Wolf * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ /* verilator lint_off WIDTH */ /* verilator lint_off PINMISSING */ /* verilator lint_off CASEOVERLAP */ /* verilator lint_off CASEINCOMPLETE */ `timescale 1 ns / 1 ps // `default_nettype none // `define DEBUG `ifdef DEBUG `define debug(debug_command) debug_command `else `define debug(debug_command) `endif `define assert(assert_expr) empty_statement `define FORMAL_KEEP /*************************************************************** * picorv32 ***************************************************************/ module picorv32 #( parameter [31:0] PROGADDR_RESET = 32'h0000_0000, parameter [31:0] STACKADDR = 32'hffff_ffff ) ( input clk, resetn, output reg trap, output reg mem_valid, output reg mem_instr, input mem_ready, output reg [31:0] mem_addr, output reg [31:0] mem_wdata, output reg [ 3:0] mem_wstrb, input [31:0] mem_rdata, // Look-Ahead Interface output mem_la_read, output mem_la_write, output [31:0] mem_la_addr, output reg [31:0] mem_la_wdata, output reg [ 3:0] mem_la_wstrb, // Pico Co-Processor Interface (PCPI) output reg pcpi_valid, output reg [31:0] pcpi_insn, output [31:0] pcpi_rs1, output [31:0] pcpi_rs2, input pcpi_wr, input [31:0] pcpi_rd, input pcpi_wait, input pcpi_ready, // IF DEBUG output reg fetch_next, output reg [31:0] dbg_insn_opcode, output reg [31:0] dbg_insn_addr, output reg [63:0] dbg_ascii_instr ); localparam integer regfile_size = 32; localparam integer regindex_bits = 5; reg [63:0] count_cycle, count_instr; reg [31:0] reg_pc, reg_next_pc, reg_op1, reg_op2, reg_out; reg [4:0] reg_sh; reg [31:0] next_insn_opcode; wire dbg_mem_valid = mem_valid; wire dbg_mem_instr = mem_instr; wire dbg_mem_ready = mem_ready; wire [31:0] dbg_mem_addr = mem_addr; wire [31:0] dbg_mem_wdata = mem_wdata; wire [3:0] dbg_mem_wstrb = mem_wstrb; wire [31:0] dbg_mem_rdata = mem_rdata; assign pcpi_rs1 = reg_op1; assign pcpi_rs2 = reg_op2; wire [31:0] next_pc; task empty_statement; // This task is used by the `assert directive in non-formal mode to // avoid empty statement (which are unsupported by plain Verilog syntax). begin end endtask // Internal PCPI Cores wire pcpi_mul_wr; wire [31:0] pcpi_mul_rd; wire pcpi_mul_wait; wire pcpi_mul_ready; wire pcpi_div_wr; wire [31:0] pcpi_div_rd; wire pcpi_div_wait; wire pcpi_div_ready; reg pcpi_int_wr; reg [31:0] pcpi_int_rd; reg pcpi_int_wait; reg pcpi_int_ready; picorv32_pcpi_mul pcpi_mul ( .clk (clk), .resetn (resetn), .pcpi_valid(pcpi_valid), .pcpi_insn (pcpi_insn), .pcpi_rs1 (pcpi_rs1), .pcpi_rs2 (pcpi_rs2), .pcpi_wr (pcpi_mul_wr), .pcpi_rd (pcpi_mul_rd), .pcpi_wait (pcpi_mul_wait), .pcpi_ready(pcpi_mul_ready) ); picorv32_pcpi_div pcpi_div ( .clk (clk), .resetn (resetn), .pcpi_valid(pcpi_valid), .pcpi_insn (pcpi_insn), .pcpi_rs1 (pcpi_rs1), .pcpi_rs2 (pcpi_rs2), .pcpi_wr (pcpi_div_wr), .pcpi_rd (pcpi_div_rd), .pcpi_wait (pcpi_div_wait), .pcpi_ready(pcpi_div_ready) ); always @* begin pcpi_int_wr = 0; pcpi_int_rd = 32'bx; pcpi_int_wait = |{pcpi_mul_wait, pcpi_div_wait}; pcpi_int_ready = |{pcpi_mul_ready, pcpi_div_ready}; (* parallel_case *) case (1'b1) 0: begin pcpi_int_wr = 0; pcpi_int_rd = 0; end pcpi_mul_ready: begin pcpi_int_wr = pcpi_mul_wr; pcpi_int_rd = pcpi_mul_rd; end pcpi_div_ready: begin pcpi_int_wr = pcpi_div_wr; pcpi_int_rd = pcpi_div_rd; end endcase end // Memory Interface reg [1:0] mem_state; reg [1:0] mem_wordsize; reg [31:0] mem_rdata_word; reg [31:0] mem_rdata_q; reg mem_do_prefetch; reg mem_do_rinst; reg mem_do_rdata; reg mem_do_wdata; wire mem_xfer; reg last_mem_valid; reg [15:0] mem_16bit_buffer; wire [31:0] mem_rdata_latched_noshuffle; wire [31:0] mem_rdata_latched; assign mem_xfer = mem_valid && mem_ready; wire mem_busy = |{mem_do_prefetch, mem_do_rinst, mem_do_rdata, mem_do_wdata}; wire mem_done = resetn && ((mem_xfer && |mem_state && (mem_do_rinst || mem_do_rdata || mem_do_wdata)) || (&mem_state && mem_do_rinst)); assign mem_la_write = resetn && !mem_state && mem_do_wdata; assign mem_la_read = resetn && ((!mem_state && (mem_do_rinst || mem_do_prefetch || mem_do_rdata))); assign mem_la_addr = (mem_do_prefetch || mem_do_rinst) ? {next_pc[31:2], 2'b00} : {reg_op1[31:2], 2'b00}; assign mem_rdata_latched_noshuffle = mem_xfer ? mem_rdata : mem_rdata_q; assign mem_rdata_latched = mem_rdata_latched_noshuffle; always @(posedge clk) begin if (!resetn) begin last_mem_valid <= 0; end else begin if (!last_mem_valid) last_mem_valid <= mem_valid && !mem_ready; end end always @* begin (* full_case *) case (mem_wordsize) 0: begin mem_la_wdata = reg_op2; mem_la_wstrb = 4'b1111; mem_rdata_word = mem_rdata; end 1: begin mem_la_wdata = {2{reg_op2[15:0]}}; mem_la_wstrb = reg_op1[1] ? 4'b1100 : 4'b0011; case (reg_op1[1]) 1'b0: mem_rdata_word = {16'b0, mem_rdata[15:0]}; 1'b1: mem_rdata_word = {16'b0, mem_rdata[31:16]}; endcase end 2: begin mem_la_wdata = {4{reg_op2[7:0]}}; mem_la_wstrb = 4'b0001 << reg_op1[1:0]; case (reg_op1[1:0]) 2'b00: mem_rdata_word = {24'b0, mem_rdata[7:0]}; 2'b01: mem_rdata_word = {24'b0, mem_rdata[15:8]}; 2'b10: mem_rdata_word = {24'b0, mem_rdata[23:16]}; 2'b11: mem_rdata_word = {24'b0, mem_rdata[31:24]}; endcase end endcase end always @(posedge clk) begin if (mem_xfer) begin mem_rdata_q <= mem_rdata; next_insn_opcode <= mem_rdata; end end always @(posedge clk) begin if (resetn && !trap) begin if (mem_do_prefetch || mem_do_rinst || mem_do_rdata) `assert(!mem_do_wdata); if (mem_do_prefetch || mem_do_rinst) `assert(!mem_do_rdata); if (mem_do_rdata) `assert(!mem_do_prefetch && !mem_do_rinst); if (mem_do_wdata) `assert(!(mem_do_prefetch || mem_do_rinst || mem_do_rdata)); if (mem_state == 2 || mem_state == 3) `assert(mem_valid || mem_do_prefetch); end end always @(posedge clk) begin if (!resetn || trap) begin if (!resetn) mem_state <= 0; if (!resetn || mem_ready) mem_valid <= 0; end else begin if (mem_la_read || mem_la_write) begin mem_addr <= mem_la_addr; mem_wstrb <= mem_la_wstrb & {4{mem_la_write}}; end if (mem_la_write) begin mem_wdata <= mem_la_wdata; end case (mem_state) 0: begin if (mem_do_prefetch || mem_do_rinst || mem_do_rdata) begin mem_valid <= 1; mem_instr <= mem_do_prefetch || mem_do_rinst; mem_wstrb <= 0; mem_state <= 1; end if (mem_do_wdata) begin mem_valid <= 1; mem_instr <= 0; mem_state <= 2; end end 1: begin `assert(mem_wstrb == 0); `assert(mem_do_prefetch || mem_do_rinst || mem_do_rdata); `assert(mem_valid == 1); `assert(mem_instr == (mem_do_prefetch || mem_do_rinst)); if (mem_xfer) begin mem_valid <= 0; mem_state <= mem_do_rinst || mem_do_rdata ? 0 : 3; end end 2: begin `assert(mem_wstrb != 0); `assert(mem_do_wdata); if (mem_xfer) begin mem_valid <= 0; mem_state <= 0; end end 3: begin `assert(mem_wstrb == 0); `assert(mem_do_prefetch); if (mem_do_rinst) begin mem_state <= 0; end end endcase end end // Instruction Decoder reg instr_lui, instr_auipc, instr_jal, instr_jalr; reg instr_beq, instr_bne, instr_blt, instr_bge, instr_bltu, instr_bgeu; reg instr_lb, instr_lh, instr_lw, instr_lbu, instr_lhu, instr_sb, instr_sh, instr_sw; reg instr_addi, instr_slti, instr_sltiu, instr_xori, instr_ori; reg instr_andi, instr_slli, instr_srli, instr_srai; reg instr_add, instr_sub, instr_sll, instr_slt, instr_sltu; reg instr_xor, instr_srl, instr_sra, instr_or, instr_and; reg instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh, instr_ecall_ebreak; wire instr_trap; reg [regindex_bits-1:0] decoded_rd, decoded_rs1, decoded_rs2; reg [31:0] decoded_imm, decoded_imm_j; reg decoder_trigger; reg decoder_trigger_q; reg decoder_pseudo_trigger; reg decoder_pseudo_trigger_q; reg is_lui_auipc_jal; reg is_lb_lh_lw_lbu_lhu; reg is_slli_srli_srai; reg is_jalr_addi_slti_sltiu_xori_ori_andi; reg is_sb_sh_sw; reg is_sll_srl_sra; reg is_lui_auipc_jal_jalr_addi_add_sub; reg is_slti_blt_slt; reg is_sltiu_bltu_sltu; reg is_beq_bne_blt_bge_bltu_bgeu; reg is_lbu_lhu_lw; reg is_alu_reg_imm; reg is_alu_reg_reg; reg is_compare; assign instr_trap = !{instr_lui, instr_auipc, instr_jal, instr_jalr, instr_beq, instr_bne, instr_blt, instr_bge, instr_bltu, instr_bgeu, instr_lb, instr_lh, instr_lw, instr_lbu, instr_lhu, instr_sb, instr_sh, instr_sw, instr_addi, instr_slti, instr_sltiu, instr_xori, instr_ori, instr_andi, instr_slli, instr_srli, instr_srai, instr_add, instr_sub, instr_sll, instr_slt, instr_sltu, instr_xor, instr_srl, instr_sra, instr_or, instr_and, instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh}; wire is_rdcycle_rdcycleh_rdinstr_rdinstrh; assign is_rdcycle_rdcycleh_rdinstr_rdinstrh = |{instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh}; reg [63:0] new_ascii_instr; reg [31:0] dbg_insn_imm; reg [4:0] dbg_insn_rs1; reg [4:0] dbg_insn_rs2; reg [4:0] dbg_insn_rd; reg [31:0] dbg_rs1val; reg [31:0] dbg_rs2val; reg dbg_rs1val_valid; reg dbg_rs2val_valid; reg [127:0] dbg_ascii_state; always @* begin new_ascii_instr = ""; if (instr_lui) new_ascii_instr = "lui"; if (instr_auipc) new_ascii_instr = "auipc"; if (instr_jal) new_ascii_instr = "jal"; if (instr_jalr) new_ascii_instr = "jalr"; if (instr_beq) new_ascii_instr = "beq"; if (instr_bne) new_ascii_instr = "bne"; if (instr_blt) new_ascii_instr = "blt"; if (instr_bge) new_ascii_instr = "bge"; if (instr_bltu) new_ascii_instr = "bltu"; if (instr_bgeu) new_ascii_instr = "bgeu"; if (instr_lb) new_ascii_instr = "lb"; if (instr_lh) new_ascii_instr = "lh"; if (instr_lw) new_ascii_instr = "lw"; if (instr_lbu) new_ascii_instr = "lbu"; if (instr_lhu) new_ascii_instr = "lhu"; if (instr_sb) new_ascii_instr = "sb"; if (instr_sh) new_ascii_instr = "sh"; if (instr_sw) new_ascii_instr = "sw"; if (instr_addi) new_ascii_instr = "addi"; if (instr_slti) new_ascii_instr = "slti"; if (instr_sltiu) new_ascii_instr = "sltiu"; if (instr_xori) new_ascii_instr = "xori"; if (instr_ori) new_ascii_instr = "ori"; if (instr_andi) new_ascii_instr = "andi"; if (instr_slli) new_ascii_instr = "slli"; if (instr_srli) new_ascii_instr = "srli"; if (instr_srai) new_ascii_instr = "srai"; if (instr_add) new_ascii_instr = "add"; if (instr_sub) new_ascii_instr = "sub"; if (instr_sll) new_ascii_instr = "sll"; if (instr_slt) new_ascii_instr = "slt"; if (instr_sltu) new_ascii_instr = "sltu"; if (instr_xor) new_ascii_instr = "xor"; if (instr_srl) new_ascii_instr = "srl"; if (instr_sra) new_ascii_instr = "sra"; if (instr_or) new_ascii_instr = "or"; if (instr_and) new_ascii_instr = "and"; if (instr_rdcycle) new_ascii_instr = "rdcycle"; if (instr_rdcycleh) new_ascii_instr = "rdcycleh"; if (instr_rdinstr) new_ascii_instr = "rdinstr"; if (instr_rdinstrh) new_ascii_instr = "rdinstrh"; end reg [63:0] q_ascii_instr; reg [31:0] q_insn_imm; reg [31:0] q_insn_opcode; reg [4:0] q_insn_rs1; reg [4:0] q_insn_rs2; reg [4:0] q_insn_rd; wire launch_next_insn; reg dbg_valid_insn; reg [63:0] cached_ascii_instr; reg [31:0] cached_insn_imm; reg [31:0] cached_insn_opcode; reg [4:0] cached_insn_rs1; reg [4:0] cached_insn_rs2; reg [4:0] cached_insn_rd; always @(posedge clk) begin q_ascii_instr <= dbg_ascii_instr; q_insn_imm <= dbg_insn_imm; q_insn_opcode <= dbg_insn_opcode; q_insn_rs1 <= dbg_insn_rs1; q_insn_rs2 <= dbg_insn_rs2; q_insn_rd <= dbg_insn_rd; fetch_next <= launch_next_insn; if (!resetn || trap) dbg_valid_insn <= 0; else if (launch_next_insn) dbg_valid_insn <= 1; if (decoder_trigger_q) begin cached_ascii_instr <= new_ascii_instr; cached_insn_imm <= decoded_imm; if (&next_insn_opcode[1:0]) cached_insn_opcode <= next_insn_opcode; else cached_insn_opcode <= {16'b0, next_insn_opcode[15:0]}; cached_insn_rs1 <= decoded_rs1; cached_insn_rs2 <= decoded_rs2; cached_insn_rd <= decoded_rd; end if (launch_next_insn) begin dbg_insn_addr <= next_pc; end end always @* begin dbg_ascii_instr = q_ascii_instr; dbg_insn_imm = q_insn_imm; dbg_insn_opcode = q_insn_opcode; dbg_insn_rs1 = q_insn_rs1; dbg_insn_rs2 = q_insn_rs2; dbg_insn_rd = q_insn_rd; if (fetch_next) begin if (decoder_pseudo_trigger_q) begin dbg_ascii_instr = cached_ascii_instr; dbg_insn_imm = cached_insn_imm; dbg_insn_opcode = cached_insn_opcode; dbg_insn_rs1 = cached_insn_rs1; dbg_insn_rs2 = cached_insn_rs2; dbg_insn_rd = cached_insn_rd; end else begin dbg_ascii_instr = new_ascii_instr; if (&next_insn_opcode[1:0]) dbg_insn_opcode = next_insn_opcode; else dbg_insn_opcode = {16'b0, next_insn_opcode[15:0]}; dbg_insn_imm = decoded_imm; dbg_insn_rs1 = decoded_rs1; dbg_insn_rs2 = decoded_rs2; dbg_insn_rd = decoded_rd; end end end always @(posedge clk) begin is_lui_auipc_jal <= |{instr_lui, instr_auipc, instr_jal}; is_lui_auipc_jal_jalr_addi_add_sub <= |{instr_lui, instr_auipc, instr_jal, instr_jalr, instr_addi, instr_add, instr_sub}; is_slti_blt_slt <= |{instr_slti, instr_blt, instr_slt}; is_sltiu_bltu_sltu <= |{instr_sltiu, instr_bltu, instr_sltu}; is_lbu_lhu_lw <= |{instr_lbu, instr_lhu, instr_lw}; is_compare <= |{is_beq_bne_blt_bge_bltu_bgeu, instr_slti, instr_slt, instr_sltiu, instr_sltu}; if (mem_do_rinst && mem_done) begin instr_lui <= mem_rdata_latched[6:0] == 7'b0110111; instr_auipc <= mem_rdata_latched[6:0] == 7'b0010111; instr_jal <= mem_rdata_latched[6:0] == 7'b1101111; instr_jalr <= mem_rdata_latched[6:0] == 7'b1100111 && mem_rdata_latched[14:12] == 3'b000; is_beq_bne_blt_bge_bltu_bgeu <= mem_rdata_latched[6:0] == 7'b1100011; is_lb_lh_lw_lbu_lhu <= mem_rdata_latched[6:0] == 7'b0000011; is_sb_sh_sw <= mem_rdata_latched[6:0] == 7'b0100011; is_alu_reg_imm <= mem_rdata_latched[6:0] == 7'b0010011; is_alu_reg_reg <= mem_rdata_latched[6:0] == 7'b0110011; { decoded_imm_j[31:20], decoded_imm_j[10:1], decoded_imm_j[11], decoded_imm_j[19:12], decoded_imm_j[0] } <= $signed( {mem_rdata_latched[31:12], 1'b0} ); decoded_rd <= mem_rdata_latched[11:7]; decoded_rs1 <= mem_rdata_latched[19:15]; decoded_rs2 <= mem_rdata_latched[24:20]; end if (decoder_trigger && !decoder_pseudo_trigger) begin pcpi_insn <= mem_rdata_q; instr_beq <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b000; instr_bne <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b001; instr_blt <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b100; instr_bge <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b101; instr_bltu <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b110; instr_bgeu <= is_beq_bne_blt_bge_bltu_bgeu && mem_rdata_q[14:12] == 3'b111; instr_lb <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b000; instr_lh <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b001; instr_lw <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b010; instr_lbu <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b100; instr_lhu <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b101; instr_sb <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b000; instr_sh <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b001; instr_sw <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b010; instr_addi <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b000; instr_slti <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b010; instr_sltiu <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b011; instr_xori <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b100; instr_ori <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b110; instr_andi <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b111; instr_slli <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000; instr_srli <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0000000; instr_srai <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000; instr_add <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b000 && mem_rdata_q[31:25] == 7'b0000000; instr_sub <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b000 && mem_rdata_q[31:25] == 7'b0100000; instr_sll <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000; instr_slt <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b010 && mem_rdata_q[31:25] == 7'b0000000; instr_sltu <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b011 && mem_rdata_q[31:25] == 7'b0000000; instr_xor <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b100 && mem_rdata_q[31:25] == 7'b0000000; instr_srl <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0000000; instr_sra <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000; instr_or <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b110 && mem_rdata_q[31:25] == 7'b0000000; instr_and <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b111 && mem_rdata_q[31:25] == 7'b0000000; instr_rdcycle <= ((mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11000000000000000010) || (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11000000000100000010)); instr_rdcycleh <= ((mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11001000000000000010) || (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11001000000100000010)); instr_rdinstr <= (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11000000001000000010); instr_rdinstrh <= (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[31:12] == 'b11001000001000000010); instr_ecall_ebreak <= (mem_rdata_q[6:0] == 7'b1110011 && !mem_rdata_q[31:21] && !mem_rdata_q[19:7]); is_slli_srli_srai <= is_alu_reg_imm && |{ mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000, mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0000000, mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000 }; is_jalr_addi_slti_sltiu_xori_ori_andi <= instr_jalr || is_alu_reg_imm && |{ mem_rdata_q[14:12] == 3'b000, mem_rdata_q[14:12] == 3'b010, mem_rdata_q[14:12] == 3'b011, mem_rdata_q[14:12] == 3'b100, mem_rdata_q[14:12] == 3'b110, mem_rdata_q[14:12] == 3'b111 }; is_sll_srl_sra <= is_alu_reg_reg && |{ mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000, mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0000000, mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000 }; is_lui_auipc_jal_jalr_addi_add_sub <= 0; is_compare <= 0; (* parallel_case *) case (1'b1) instr_jal: decoded_imm <= decoded_imm_j; |{instr_lui, instr_auipc} : decoded_imm <= mem_rdata_q[31:12] << 12; |{instr_jalr, is_lb_lh_lw_lbu_lhu, is_alu_reg_imm} : decoded_imm <= $signed(mem_rdata_q[31:20]); is_beq_bne_blt_bge_bltu_bgeu: decoded_imm <= $signed( {mem_rdata_q[31], mem_rdata_q[7], mem_rdata_q[30:25], mem_rdata_q[11:8], 1'b0} ); is_sb_sh_sw: decoded_imm <= $signed({mem_rdata_q[31:25], mem_rdata_q[11:7]}); default: decoded_imm <= 1'bx; endcase end if (!resetn) begin is_beq_bne_blt_bge_bltu_bgeu <= 0; is_compare <= 0; instr_beq <= 0; instr_bne <= 0; instr_blt <= 0; instr_bge <= 0; instr_bltu <= 0; instr_bgeu <= 0; instr_addi <= 0; instr_slti <= 0; instr_sltiu <= 0; instr_xori <= 0; instr_ori <= 0; instr_andi <= 0; instr_add <= 0; instr_sub <= 0; instr_sll <= 0; instr_slt <= 0; instr_sltu <= 0; instr_xor <= 0; instr_srl <= 0; instr_sra <= 0; instr_or <= 0; instr_and <= 0; end end // Main State Machine localparam cpu_state_trap = 8'b10000000; localparam cpu_state_fetch = 8'b01000000; localparam cpu_state_ld_rs1 = 8'b00100000; localparam cpu_state_ld_rs2 = 8'b00010000; localparam cpu_state_exec = 8'b00001000; localparam cpu_state_shift = 8'b00000100; localparam cpu_state_stmem = 8'b00000010; localparam cpu_state_ldmem = 8'b00000001; reg [7:0] cpu_state; always @* begin dbg_ascii_state = ""; if (cpu_state == cpu_state_trap) dbg_ascii_state = "trap"; if (cpu_state == cpu_state_fetch) dbg_ascii_state = "fetch"; if (cpu_state == cpu_state_ld_rs1) dbg_ascii_state = "ld_rs1"; if (cpu_state == cpu_state_ld_rs2) dbg_ascii_state = "ld_rs2"; if (cpu_state == cpu_state_exec) dbg_ascii_state = "exec"; if (cpu_state == cpu_state_shift) dbg_ascii_state = "shift"; if (cpu_state == cpu_state_stmem) dbg_ascii_state = "stmem"; if (cpu_state == cpu_state_ldmem) dbg_ascii_state = "ldmem"; end reg set_mem_do_rinst; reg set_mem_do_rdata; reg set_mem_do_wdata; reg latched_store; reg latched_stalu; reg latched_branch; reg latched_is_lu; reg latched_is_lh; reg latched_is_lb; reg [regindex_bits-1:0] latched_rd; reg [31:0] current_pc; assign next_pc = latched_store && latched_branch ? reg_out & ~1 : reg_next_pc; reg [3:0] pcpi_timeout_counter; reg pcpi_timeout; reg [31:0] alu_out, alu_out_q; reg alu_out_0, alu_out_0_q; reg [31:0] alu_add_sub; reg [31:0] alu_shl, alu_shr; reg alu_eq, alu_ltu, alu_lts; always @* begin alu_add_sub = instr_sub ? reg_op1 - reg_op2 : reg_op1 + reg_op2; alu_eq = reg_op1 == reg_op2; alu_lts = $signed(reg_op1) < $signed(reg_op2); alu_ltu = reg_op1 < reg_op2; alu_shl = reg_op1 << reg_op2[4:0]; alu_shr = $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0]; end always @* begin alu_out_0 = 'bx; (* parallel_case, full_case *) case (1'b1) instr_beq: alu_out_0 = alu_eq; instr_bne: alu_out_0 = !alu_eq; instr_bge: alu_out_0 = !alu_lts; instr_bgeu: alu_out_0 = !alu_ltu; is_slti_blt_slt: alu_out_0 = alu_lts; is_sltiu_bltu_sltu: alu_out_0 = alu_ltu; endcase alu_out = 'bx; (* parallel_case, full_case *) case (1'b1) is_lui_auipc_jal_jalr_addi_add_sub: alu_out = alu_add_sub; is_compare: alu_out = alu_out_0; instr_xori || instr_xor: alu_out = reg_op1 ^ reg_op2; instr_ori || instr_or: alu_out = reg_op1 | reg_op2; instr_andi || instr_and: alu_out = reg_op1 & reg_op2; 0: alu_out = alu_shl; 0: alu_out = alu_shr; endcase end reg cpuregs_write; reg [31:0] cpuregs_wrdata; reg [31:0] cpuregs_rs1; reg [31:0] cpuregs_rs2; reg [regindex_bits-1:0] decoded_rs; always @* begin cpuregs_write = 0; cpuregs_wrdata = 'bx; if (cpu_state == cpu_state_fetch) begin (* parallel_case *) case (1'b1) latched_branch: begin cpuregs_wrdata = reg_pc + 4; cpuregs_write = 1; end latched_store && !latched_branch: begin cpuregs_wrdata = latched_stalu ? alu_out_q : reg_out; cpuregs_write = 1; end endcase end end wire [31:0] cpuregs_rdata1; wire [31:0] cpuregs_rdata2; wire [5:0] cpuregs_waddr = latched_rd; wire [5:0] cpuregs_raddr1 = decoded_rs1; wire [5:0] cpuregs_raddr2 = decoded_rs2; picorv32_regs cpuregs ( .clk(clk), .wen(resetn && cpuregs_write && latched_rd), .waddr(cpuregs_waddr), .raddr1(cpuregs_raddr1), .raddr2(cpuregs_raddr2), .wdata(cpuregs_wrdata), .rdata1(cpuregs_rdata1), .rdata2(cpuregs_rdata2) ); always @* begin decoded_rs = 'bx; cpuregs_rs1 = decoded_rs1 ? cpuregs_rdata1 : 0; cpuregs_rs2 = decoded_rs2 ? cpuregs_rdata2 : 0; end assign launch_next_insn = cpu_state == cpu_state_fetch && decoder_trigger; always @(posedge clk) begin trap <= 0; reg_sh <= 'bx; reg_out <= 'bx; set_mem_do_rinst = 0; set_mem_do_rdata = 0; set_mem_do_wdata = 0; alu_out_0_q <= alu_out_0; alu_out_q <= alu_out; if (launch_next_insn) begin dbg_rs1val <= 'bx; dbg_rs2val <= 'bx; dbg_rs1val_valid <= 0; dbg_rs2val_valid <= 0; end if (resetn && pcpi_valid && !pcpi_int_wait) begin if (pcpi_timeout_counter) pcpi_timeout_counter <= pcpi_timeout_counter - 1; end else pcpi_timeout_counter <= ~0; pcpi_timeout <= !pcpi_timeout_counter; count_cycle <= resetn ? count_cycle + 1 : 0; decoder_trigger <= mem_do_rinst && mem_done; decoder_trigger_q <= decoder_trigger; decoder_pseudo_trigger <= 0; decoder_pseudo_trigger_q <= decoder_pseudo_trigger; if (!resetn) begin reg_pc <= PROGADDR_RESET; reg_next_pc <= PROGADDR_RESET; count_instr <= 0; latched_store <= 0; latched_stalu <= 0; latched_branch <= 0; latched_is_lu <= 0; latched_is_lh <= 0; latched_is_lb <= 0; pcpi_valid <= 0; pcpi_timeout <= 0; if (~STACKADDR) begin latched_store <= 1; latched_rd <= 2; reg_out <= STACKADDR; end cpu_state <= cpu_state_fetch; end else (* parallel_case, full_case *) case (cpu_state) cpu_state_trap: begin trap <= 1; end cpu_state_fetch: begin mem_do_rinst <= !decoder_trigger; mem_wordsize <= 0; current_pc = reg_next_pc; (* parallel_case *) case (1'b1) latched_branch: begin current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) & ~1 : reg_next_pc; `debug($display( "ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + 4, current_pc);) end latched_store && !latched_branch: begin `debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out );) end endcase reg_pc <= current_pc; reg_next_pc <= current_pc; latched_store <= 0; latched_stalu <= 0; latched_branch <= 0; latched_is_lu <= 0; latched_is_lh <= 0; latched_is_lb <= 0; latched_rd <= decoded_rd; if (decoder_trigger) begin `debug($display("-- %-0t", $time);) reg_next_pc <= current_pc + 4; count_instr <= count_instr + 1; if (instr_jal) begin mem_do_rinst <= 1; reg_next_pc <= current_pc + decoded_imm_j; latched_branch <= 1; end else begin mem_do_rinst <= 0; mem_do_prefetch <= !instr_jalr; cpu_state <= cpu_state_ld_rs1; end end end cpu_state_ld_rs1: begin reg_op1 <= 'bx; reg_op2 <= 'bx; (* parallel_case *) case (1'b1) instr_trap: begin `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) reg_op1 <= cpuregs_rs1; dbg_rs1val <= cpuregs_rs1; dbg_rs1val_valid <= 1; pcpi_valid <= 1; `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) reg_sh <= cpuregs_rs2; reg_op2 <= cpuregs_rs2; dbg_rs2val <= cpuregs_rs2; dbg_rs2val_valid <= 1; if (pcpi_int_ready) begin mem_do_rinst <= 1; pcpi_valid <= 0; reg_out <= pcpi_int_rd; latched_store <= pcpi_int_wr; cpu_state <= cpu_state_fetch; end else if (pcpi_timeout || instr_ecall_ebreak) begin pcpi_valid <= 0; `debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);) cpu_state <= cpu_state_trap; end end is_rdcycle_rdcycleh_rdinstr_rdinstrh: begin (* parallel_case, full_case *) case (1'b1) instr_rdcycle: reg_out <= count_cycle[31:0]; instr_rdcycleh: reg_out <= count_cycle[63:32]; instr_rdinstr: reg_out <= count_instr[31:0]; instr_rdinstrh: reg_out <= count_instr[63:32]; endcase latched_store <= 1; cpu_state <= cpu_state_fetch; end is_lui_auipc_jal: begin reg_op1 <= instr_lui ? 0 : reg_pc; reg_op2 <= decoded_imm; mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_exec; end is_lb_lh_lw_lbu_lhu && !instr_trap: begin `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) reg_op1 <= cpuregs_rs1; dbg_rs1val <= cpuregs_rs1; dbg_rs1val_valid <= 1; cpu_state <= cpu_state_ldmem; mem_do_rinst <= 1; end is_slli_srli_srai: begin `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) reg_op1 <= cpuregs_rs1; dbg_rs1val <= cpuregs_rs1; dbg_rs1val_valid <= 1; reg_sh <= decoded_rs2; cpu_state <= cpu_state_shift; end is_jalr_addi_slti_sltiu_xori_ori_andi: begin `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) reg_op1 <= cpuregs_rs1; dbg_rs1val <= cpuregs_rs1; dbg_rs1val_valid <= 1; reg_op2 <= decoded_imm; mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_exec; end default: begin `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) reg_op1 <= cpuregs_rs1; dbg_rs1val <= cpuregs_rs1; dbg_rs1val_valid <= 1; `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) reg_sh <= cpuregs_rs2; reg_op2 <= cpuregs_rs2; dbg_rs2val <= cpuregs_rs2; dbg_rs2val_valid <= 1; (* parallel_case *) case (1'b1) is_sb_sh_sw: begin cpu_state <= cpu_state_stmem; mem_do_rinst <= 1; end is_sll_srl_sra: begin cpu_state <= cpu_state_shift; end default: begin mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_exec; end endcase end endcase end cpu_state_ld_rs2: begin `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) reg_sh <= cpuregs_rs2; reg_op2 <= cpuregs_rs2; dbg_rs2val <= cpuregs_rs2; dbg_rs2val_valid <= 1; (* parallel_case *) case (1'b1) instr_trap: begin pcpi_valid <= 1; if (pcpi_int_ready) begin mem_do_rinst <= 1; pcpi_valid <= 0; reg_out <= pcpi_int_rd; latched_store <= pcpi_int_wr; cpu_state <= cpu_state_fetch; end else if (pcpi_timeout || instr_ecall_ebreak) begin pcpi_valid <= 0; `debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);) cpu_state <= cpu_state_trap; end end is_sb_sh_sw: begin cpu_state <= cpu_state_stmem; mem_do_rinst <= 1; end is_sll_srl_sra: begin cpu_state <= cpu_state_shift; end default: begin mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_exec; end endcase end cpu_state_exec: begin reg_out <= reg_pc + decoded_imm; if (is_beq_bne_blt_bge_bltu_bgeu) begin latched_rd <= 0; latched_store <= alu_out_0; latched_branch <= alu_out_0; if (mem_done) cpu_state <= cpu_state_fetch; if (alu_out_0) begin decoder_trigger <= 0; set_mem_do_rinst = 1; end end else begin latched_branch <= instr_jalr; latched_store <= 1; latched_stalu <= 1; cpu_state <= cpu_state_fetch; end end cpu_state_shift: begin latched_store <= 1; if (reg_sh == 0) begin reg_out <= reg_op1; mem_do_rinst <= mem_do_prefetch; cpu_state <= cpu_state_fetch; end else if (reg_sh >= 4) begin (* parallel_case, full_case *) case (1'b1) instr_slli || instr_sll: reg_op1 <= reg_op1 << 4; instr_srli || instr_srl: reg_op1 <= reg_op1 >> 4; instr_srai || instr_sra: reg_op1 <= $signed(reg_op1) >>> 4; endcase reg_sh <= reg_sh - 4; end else begin (* parallel_case, full_case *) case (1'b1) instr_slli || instr_sll: reg_op1 <= reg_op1 << 1; instr_srli || instr_srl: reg_op1 <= reg_op1 >> 1; instr_srai || instr_sra: reg_op1 <= $signed(reg_op1) >>> 1; endcase reg_sh <= reg_sh - 1; end end cpu_state_stmem: begin if (!mem_do_prefetch || mem_done) begin if (!mem_do_wdata) begin (* parallel_case, full_case *) case (1'b1) instr_sb: mem_wordsize <= 2; instr_sh: mem_wordsize <= 1; instr_sw: mem_wordsize <= 0; endcase reg_op1 <= reg_op1 + decoded_imm; set_mem_do_wdata = 1; end if (!mem_do_prefetch && mem_done) begin cpu_state <= cpu_state_fetch; decoder_trigger <= 1; decoder_pseudo_trigger <= 1; end end end cpu_state_ldmem: begin latched_store <= 1; if (!mem_do_prefetch || mem_done) begin if (!mem_do_rdata) begin (* parallel_case, full_case *) case (1'b1) instr_lb || instr_lbu: mem_wordsize <= 2; instr_lh || instr_lhu: mem_wordsize <= 1; instr_lw: mem_wordsize <= 0; endcase latched_is_lu <= is_lbu_lhu_lw; latched_is_lh <= instr_lh; latched_is_lb <= instr_lb; reg_op1 <= reg_op1 + decoded_imm; set_mem_do_rdata = 1; end if (!mem_do_prefetch && mem_done) begin (* parallel_case, full_case *) case (1'b1) latched_is_lu: reg_out <= mem_rdata_word; latched_is_lh: reg_out <= $signed(mem_rdata_word[15:0]); latched_is_lb: reg_out <= $signed(mem_rdata_word[7:0]); endcase decoder_trigger <= 1; decoder_pseudo_trigger <= 1; cpu_state <= cpu_state_fetch; end end end endcase if (resetn && (mem_do_rdata || mem_do_wdata)) begin if (mem_wordsize == 0 && reg_op1[1:0] != 0) begin `debug($display("MISALIGNED WORD: 0x%08x", reg_op1);) cpu_state <= cpu_state_trap; end if (mem_wordsize == 1 && reg_op1[0] != 0) begin `debug($display("MISALIGNED HALFWORD: 0x%08x", reg_op1);) cpu_state <= cpu_state_trap; end end if (resetn && mem_do_rinst && (|reg_pc[1:0])) begin `debug($display("MISALIGNED INSTRUCTION: 0x%08x", reg_pc);) cpu_state <= cpu_state_trap; end if (!resetn || mem_done) begin mem_do_prefetch <= 0; mem_do_rinst <= 0; mem_do_rdata <= 0; mem_do_wdata <= 0; end if (set_mem_do_rinst) mem_do_rinst <= 1; if (set_mem_do_rdata) mem_do_rdata <= 1; if (set_mem_do_wdata) mem_do_wdata <= 1; current_pc = 'bx; end endmodule module picorv32_regs ( input clk, wen, input [4:0] waddr, input [4:0] raddr1, input [4:0] raddr2, input [31:0] wdata, output [31:0] rdata1, output [31:0] rdata2 ); reg [31:0] regs[0:31]; always @(posedge clk) if (wen) regs[waddr[4:0]] <= wdata; assign rdata1 = regs[raddr1]; assign rdata2 = regs[raddr2]; endmodule /*************************************************************** * picorv32_pcpi_mul ***************************************************************/ module picorv32_pcpi_mul #( parameter STEPS_AT_ONCE = 1, parameter CARRY_CHAIN = 4 ) ( input clk, resetn, input pcpi_valid, input [31:0] pcpi_insn, input [31:0] pcpi_rs1, input [31:0] pcpi_rs2, output reg pcpi_wr, output reg [31:0] pcpi_rd, output reg pcpi_wait, output reg pcpi_ready ); reg instr_mul, instr_mulh, instr_mulhsu, instr_mulhu; wire instr_any_mul = |{instr_mul, instr_mulh, instr_mulhsu, instr_mulhu}; wire instr_any_mulh = |{instr_mulh, instr_mulhsu, instr_mulhu}; wire instr_rs1_signed = |{instr_mulh, instr_mulhsu}; wire instr_rs2_signed = |{instr_mulh}; reg pcpi_wait_q; wire mul_start = pcpi_wait && !pcpi_wait_q; always @(posedge clk) begin instr_mul <= 0; instr_mulh <= 0; instr_mulhsu <= 0; instr_mulhu <= 0; if (resetn && pcpi_valid && pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin case (pcpi_insn[14:12]) 3'b000: instr_mul <= 1; 3'b001: instr_mulh <= 1; 3'b010: instr_mulhsu <= 1; 3'b011: instr_mulhu <= 1; endcase end pcpi_wait <= instr_any_mul; pcpi_wait_q <= pcpi_wait; end reg [63:0] rs1, rs2, rd, rdx; reg [63:0] next_rs1, next_rs2, this_rs2; reg [63:0] next_rd, next_rdx, next_rdt; reg [6:0] mul_counter; reg mul_waiting; reg mul_finish; integer i, j; // carry save accumulator always @* begin next_rd = rd; next_rdx = rdx; next_rs1 = rs1; next_rs2 = rs2; for (i = 0; i < STEPS_AT_ONCE; i = i + 1) begin this_rs2 = next_rs1[0] ? next_rs2 : 0; if (CARRY_CHAIN == 0) begin next_rdt = next_rd ^ next_rdx ^ this_rs2; next_rdx = ((next_rd & next_rdx) | (next_rd & this_rs2) | (next_rdx & this_rs2)) << 1; next_rd = next_rdt; end else begin next_rdt = 0; for (j = 0; j < 64; j = j + CARRY_CHAIN) {next_rdt[j+CARRY_CHAIN-1], next_rd[j +: CARRY_CHAIN]} = next_rd[j +: CARRY_CHAIN] + next_rdx[j +: CARRY_CHAIN] + this_rs2[j +: CARRY_CHAIN]; next_rdx = next_rdt << 1; end next_rs1 = next_rs1 >> 1; next_rs2 = next_rs2 << 1; end end always @(posedge clk) begin mul_finish <= 0; if (!resetn) begin mul_waiting <= 1; end else if (mul_waiting) begin if (instr_rs1_signed) rs1 <= $signed(pcpi_rs1); else rs1 <= $unsigned(pcpi_rs1); if (instr_rs2_signed) rs2 <= $signed(pcpi_rs2); else rs2 <= $unsigned(pcpi_rs2); rd <= 0; rdx <= 0; mul_counter <= (instr_any_mulh ? 63 - STEPS_AT_ONCE : 31 - STEPS_AT_ONCE); mul_waiting <= !mul_start; end else begin rd <= next_rd; rdx <= next_rdx; rs1 <= next_rs1; rs2 <= next_rs2; mul_counter <= mul_counter - STEPS_AT_ONCE; if (mul_counter[6]) begin mul_finish <= 1; mul_waiting <= 1; end end end always @(posedge clk) begin pcpi_wr <= 0; pcpi_ready <= 0; if (mul_finish && resetn) begin pcpi_wr <= 1; pcpi_ready <= 1; pcpi_rd <= instr_any_mulh ? rd >> 32 : rd; end end endmodule /*************************************************************** * picorv32_pcpi_div ***************************************************************/ module picorv32_pcpi_div ( input clk, resetn, input pcpi_valid, input [31:0] pcpi_insn, input [31:0] pcpi_rs1, input [31:0] pcpi_rs2, output reg pcpi_wr, output reg [31:0] pcpi_rd, output reg pcpi_wait, output reg pcpi_ready ); reg instr_div, instr_divu, instr_rem, instr_remu; wire instr_any_div_rem = |{instr_div, instr_divu, instr_rem, instr_remu}; reg pcpi_wait_q; wire start = pcpi_wait && !pcpi_wait_q; always @(posedge clk) begin instr_div <= 0; instr_divu <= 0; instr_rem <= 0; instr_remu <= 0; if (resetn && pcpi_valid && !pcpi_ready && pcpi_insn[6:0] == 7'b0110011 && pcpi_insn[31:25] == 7'b0000001) begin case (pcpi_insn[14:12]) 3'b100: instr_div <= 1; 3'b101: instr_divu <= 1; 3'b110: instr_rem <= 1; 3'b111: instr_remu <= 1; endcase end pcpi_wait <= instr_any_div_rem && resetn; pcpi_wait_q <= pcpi_wait && resetn; end reg [31:0] dividend; reg [62:0] divisor; reg [31:0] quotient; reg [31:0] quotient_msk; reg running; reg outsign; always @(posedge clk) begin pcpi_ready <= 0; pcpi_wr <= 0; pcpi_rd <= 'bx; if (!resetn) begin running <= 0; end else if (start) begin running <= 1; dividend <= (instr_div || instr_rem) && pcpi_rs1[31] ? -pcpi_rs1 : pcpi_rs1; divisor <= ((instr_div || instr_rem) && pcpi_rs2[31] ? -pcpi_rs2 : pcpi_rs2) << 31; outsign <= (instr_div && (pcpi_rs1[31] != pcpi_rs2[31]) && |pcpi_rs2) || (instr_rem && pcpi_rs1[31]); quotient <= 0; quotient_msk <= 1 << 31; end else if (!quotient_msk && running) begin running <= 0; pcpi_ready <= 1; pcpi_wr <= 1; if (instr_div || instr_divu) pcpi_rd <= outsign ? -quotient : quotient; else pcpi_rd <= outsign ? -dividend : dividend; end else begin if (divisor <= dividend) begin dividend <= dividend - divisor; quotient <= quotient | quotient_msk; end divisor <= divisor >> 1; quotient_msk <= quotient_msk >> 1; end end endmodule