From 2ae2463b97b799f4f5f1bfb95aaba133e8dec378 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sun, 28 Aug 2022 19:48:50 +0100 Subject: [PATCH] First stab at adding wake/sleep state machine --- hdl/arith/hazard3_alu.v | 9 +- hdl/hazard3.f | 7 +- hdl/hazard3_config.vh | 13 +- hdl/hazard3_config_inst.vh | 3 +- hdl/hazard3_core.v | 89 ++++++++-- hdl/hazard3_cpu_1port.v | 15 ++ hdl/hazard3_cpu_2port.v | 15 ++ hdl/hazard3_csr.v | 71 ++++++-- hdl/hazard3_decode.v | 16 +- hdl/hazard3_frontend.v | 9 +- hdl/hazard3_power_ctrl.v | 159 ++++++++++++++++++ .../wfi_loop_deepsleep_powerdown.c | 65 +++++++ test/sim/tb_cxxrtl/tb.v | 13 ++ 13 files changed, 438 insertions(+), 46 deletions(-) create mode 100644 hdl/hazard3_power_ctrl.v create mode 100644 test/sim/sw_testcases/wfi_loop_deepsleep_powerdown.c diff --git a/hdl/arith/hazard3_alu.v b/hdl/arith/hazard3_alu.v index c92b8c7..48789ca 100644 --- a/hdl/arith/hazard3_alu.v +++ b/hdl/arith/hazard3_alu.v @@ -55,9 +55,9 @@ assign cmp = aluop == ALUOP_SUB ? |op_xor : lt; wire [W_DATA-1:0] shift_dout; wire shift_right_nleft = aluop == ALUOP_SRL || aluop == ALUOP_SRA || - |EXTENSION_ZBB && aluop == ALUOP_ROR || - |EXTENSION_ZBS && aluop == ALUOP_BEXT || - |EXTENSION_XH3B && aluop == ALUOP_BEXTM; + |EXTENSION_ZBB && aluop == ALUOP_ROR || + |EXTENSION_ZBS && aluop == ALUOP_BEXT || + |EXTENSION_XH3BEXTM && aluop == ALUOP_BEXTM; wire shift_arith = aluop == ALUOP_SRA; wire shift_rotate = |EXTENSION_ZBB & (aluop == ALUOP_ROR || aluop == ALUOP_ROL); @@ -152,7 +152,8 @@ end wire [W_DATA-1:0] zbs_mask = {{W_DATA-1{1'b0}}, 1'b1} << op_b[W_SHAMT-1:0]; always @ (*) begin - casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, |EXTENSION_ZBS, |EXTENSION_ZBKB, |EXTENSION_XH3B, aluop}) + casez ({|EXTENSION_A, |EXTENSION_ZBA, |EXTENSION_ZBB, |EXTENSION_ZBC, + |EXTENSION_ZBS, |EXTENSION_ZBKB, |EXTENSION_XH3BEXTM, aluop}) // Base ISA {7'bzzzzzzz, ALUOP_ADD }: result = sum; {7'bzzzzzzz, ALUOP_SUB }: result = sum; diff --git a/hdl/hazard3.f b/hdl/hazard3.f index 431d3ef..186e0f0 100644 --- a/hdl/hazard3.f +++ b/hdl/hazard3.f @@ -10,11 +10,12 @@ file arith/hazard3_onehot_priority.v file arith/hazard3_onehot_priority_dynamic.v file arith/hazard3_priority_encode.v file arith/hazard3_shift_barrel.v +file hazard3_csr.v +file hazard3_decode.v file hazard3_frontend.v file hazard3_instr_decompress.v -file hazard3_decode.v -file hazard3_csr.v -file hazard3_regfile_1w2r.v file hazard3_pmp.v +file hazard3_power_ctrl.v +file hazard3_regfile_1w2r.v file hazard3_triggers.v include . diff --git a/hdl/hazard3_config.vh b/hdl/hazard3_config.vh index 8b8a36c..667afdd 100644 --- a/hdl/hazard3_config.vh +++ b/hdl/hazard3_config.vh @@ -61,19 +61,22 @@ parameter EXTENSION_ZBS = 1, // Requires: Zbb. (This flag enables instructions in Zbkb which aren't in Zbb.) parameter EXTENSION_ZBKB = 1, -// EXTENSION_XH3B: Custom bit manipulation instructions for Hazard3 -parameter EXTENSION_XH3B = 1, - // EXTENSION_ZIFENCEI: Support for the fence.i instruction // Optional, since a plain branch/jump will also flush the prefetch queue. parameter EXTENSION_ZIFENCEI = 1, -// Note the Zicsr extension is implied by any of CSR_M_MANDATORY, CSR_M_TRAP, -// CSR_COUNTER. +// EXTENSION_XH3B: Custom bit-extract-multiple instructions for Hazard3 +parameter EXTENSION_XH3BEXTM = 1, + +// EXTENSION_XH3POWER: Custom power management controls for Hazard3 +parameter EXTENSION_XH3POWER = 1, // ---------------------------------------------------------------------------- // CSR support +// Note the Zicsr extension is implied by any of CSR_M_MANDATORY, CSR_M_TRAP, +// CSR_COUNTER. + // CSR_M_MANDATORY: Bare minimum CSR support e.g. misa. Spec says must = 1 if // CSRs are present, but I won't tell anyone. parameter CSR_M_MANDATORY = 1, diff --git a/hdl/hazard3_config_inst.vh b/hdl/hazard3_config_inst.vh index 127619c..f5d2769 100644 --- a/hdl/hazard3_config_inst.vh +++ b/hdl/hazard3_config_inst.vh @@ -22,8 +22,9 @@ .EXTENSION_ZBC (EXTENSION_ZBC), .EXTENSION_ZBS (EXTENSION_ZBS), .EXTENSION_ZBKB (EXTENSION_ZBKB), -.EXTENSION_XH3B (EXTENSION_XH3B), .EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI), +.EXTENSION_XH3BEXTM (EXTENSION_XH3BEXTM), +.EXTENSION_XH3POWER (EXTENSION_XH3POWER), .CSR_M_MANDATORY (CSR_M_MANDATORY), .CSR_M_TRAP (CSR_M_TRAP), .CSR_COUNTER (CSR_COUNTER), diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index a66c427..1a29456 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -12,8 +12,16 @@ module hazard3_core #( ) ( // Global signals input wire clk, + input wire clk_always_on, input wire rst_n, + // Power control signals + output wire pwrup_req, + input wire pwrup_ack, + output wire clk_en, + output wire unblock_out, + input wire unblock_in, + `ifdef RISCV_FORMAL `RVFI_OUTPUTS , `endif @@ -90,6 +98,8 @@ wire f_jump_priv; wire f_jump_rdy; wire f_jump_now = f_jump_req && f_jump_rdy; +wire f_frontend_pwrdown_ok; + // Predecoded register numbers, for register file access wire [W_REGADDR-1:0] f_rs1_coarse; wire [W_REGADDR-1:0] f_rs2_coarse; @@ -151,6 +161,8 @@ hazard3_frontend #( .cir_use (df_cir_use), .cir_flush_behind (df_cir_flush_behind), + .pwrdown_ok (f_frontend_pwrdown_ok), + .predecode_rs1_coarse (f_rs1_coarse), .predecode_rs2_coarse (f_rs2_coarse), .predecode_rs1_fine (f_rs1_fine), @@ -199,7 +211,9 @@ wire [W_ADDR-1:0] d_addr_offs; wire d_addr_is_regoffs; wire [W_ADDR-1:0] d_pc; wire [W_EXCEPT-1:0] d_except; -wire d_wfi; +wire d_sleep_wfi; +wire d_sleep_block; +wire d_sleep_unblock; wire d_fence_i; wire d_csr_ren; wire d_csr_wen; @@ -254,7 +268,9 @@ hazard3_decode #( .d_addr_offs (d_addr_offs), .d_addr_is_regoffs (d_addr_is_regoffs), .d_except (d_except), - .d_wfi (d_wfi), + .d_sleep_wfi (d_sleep_wfi), + .d_sleep_block (d_sleep_block), + .d_sleep_unblock (d_sleep_unblock), .d_fence_i (d_fence_i) ); @@ -297,8 +313,9 @@ reg [1:0] xm_addr_align; reg [W_MEMOP-1:0] xm_memop; reg [W_EXCEPT-1:0] xm_except; reg xm_except_to_d_mode; -reg xm_wfi; -reg xm_delay_irq_entry; +reg xm_sleep_wfi; +reg xm_sleep_block; +reg xm_delay_irq_entry_on_ls_dphase; // ---------------------------------------------------------------------------- // Stall logic @@ -376,7 +393,7 @@ assign x_stall = bus_aph_req_d && !bus_aph_ready_d || x_jump_req && !f_jump_rdy; -wire m_wfi_stall_clear; +wire m_sleep_stall_release; wire x_loadstore_pmp_fail; wire x_exec_pmp_fail; @@ -614,7 +631,7 @@ always @ (*) begin x_trig_break || x_unaligned_addr || m_trap_enter_soon || - (xm_wfi && !m_wfi_stall_clear) // FIXME will cause a timing issue, better to stall til *after* clear + ((xm_sleep_wfi || xm_sleep_block) && !m_sleep_stall_release) ); end @@ -867,7 +884,7 @@ reg prev_instr_was_32_bit; always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin - xm_delay_irq_entry <= 1'b0; + xm_delay_irq_entry_on_ls_dphase <= 1'b0; prev_instr_was_32_bit <= 1'b0; end else begin // Must hold off IRQ if we are in the second cycle of an address phase or @@ -879,7 +896,7 @@ always @ (posedge clk or negedge rst_n) begin // Also hold off on AMOs, unless the AMO is transitioning to an address // phase or completing. ("completing" excludes transitions to error phase.) - xm_delay_irq_entry <= bus_aph_req_d && !bus_aph_ready_d || + xm_delay_irq_entry_on_ls_dphase <= bus_aph_req_d && !bus_aph_ready_d || d_memop_is_amo && !( x_amo_phase == 3'h3 && bus_dph_ready_d && !bus_dph_err_d || // Read reservation failure failure also generates error @@ -916,11 +933,19 @@ wire x_except_counts_as_retire = wire x_instr_ret = |df_cir_use && (x_except == EXCEPT_NONE || x_except_counts_as_retire); wire m_dphase_in_flight = xm_memop != MEMOP_NONE && xm_memop != MEMOP_AMO; +wire m_delay_irq_entry = xm_delay_irq_entry_on_ls_dphase || ((xm_sleep_wfi || xm_sleep_block) && !m_sleep_stall_release); + +wire m_allow_sleep; +wire m_allow_power_down; +wire m_allow_sleep_on_block; +wire m_wfi_wakeup_req; + hazard3_csr #( .XLEN (W_DATA), `include "hazard3_config_inst.vh" ) csr_u ( .clk (clk), + .clk_always_on (clk_always_on), .rst_n (rst_n), // Debugger signalling @@ -958,14 +983,18 @@ hazard3_csr #( .trap_enter_rdy (m_trap_enter_rdy), .loadstore_dphase_pending (m_dphase_in_flight), .mepc_in (m_exception_return_addr), - .wfi_stall_clear (m_wfi_stall_clear), + + .pwr_allow_sleep (m_allow_sleep), + .pwr_allow_power_down (m_allow_power_down), + .pwr_allow_sleep_on_block (m_allow_sleep_on_block), + .pwr_wfi_wakeup_req (m_wfi_wakeup_req), .m_mode_execution (x_mmode_execution), .m_mode_loadstore (x_mmode_loadstore), .m_mode_trap_entry (m_mmode_trap_entry), // IRQ and exception requests - .delay_irq_entry (xm_delay_irq_entry), + .delay_irq_entry (m_delay_irq_entry), .irq (irq), .irq_software (soft_irq), .irq_timer (timer_irq), @@ -995,7 +1024,8 @@ always @ (posedge clk or negedge rst_n) begin xm_memop <= MEMOP_NONE; xm_except <= EXCEPT_NONE; xm_except_to_d_mode <= 1'b0; - xm_wfi <= 1'b0; + xm_sleep_wfi <= 1'b0; + xm_sleep_block <= 1'b0; {xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}}; end else begin if (!m_stall) begin @@ -1004,7 +1034,8 @@ always @ (posedge clk or negedge rst_n) begin xm_memop <= x_unaligned_addr || x_exec_pmp_fail || x_loadstore_pmp_fail ? MEMOP_NONE : d_memop; xm_except <= x_except; xm_except_to_d_mode <= x_trig_break_d_mode; - xm_wfi <= d_wfi && !x_exec_pmp_fail; + xm_sleep_wfi <= d_sleep_wfi && !x_exec_pmp_fail; + xm_sleep_block <= d_sleep_block && !x_exec_pmp_fail; // Note the d_starved term is required because it is possible // (e.g. PMP X permission fail) to except when the frontend is // starved, and we get a bad mepc if we let this jump ahead: @@ -1014,7 +1045,8 @@ always @ (posedge clk or negedge rst_n) begin xm_memop <= MEMOP_NONE; xm_except <= EXCEPT_NONE; xm_except_to_d_mode <= 1'b0; - xm_wfi <= 1'b0; + xm_sleep_wfi <= 1'b0; + xm_sleep_block <= 1'b0; end end else if (bus_dph_err_d) begin // First phase of 2-phase AHB5 error response. Pass the exception along on @@ -1022,11 +1054,14 @@ always @ (posedge clk or negedge rst_n) begin // suppressing any load/store that may currently be in stage X. `ifdef HAZARD3_ASSERTIONS assert(xm_memop != MEMOP_NONE); + assert(!xm_sleep_wfi); + assert(!xm_sleep_block); `endif xm_except <= |EXTENSION_A && xm_memop == MEMOP_LR_W ? EXCEPT_LOAD_FAULT : xm_memop <= MEMOP_LBU ? EXCEPT_LOAD_FAULT : EXCEPT_STORE_FAULT; - xm_wfi <= 1'b0; + xm_sleep_wfi <= 1'b0; // TODO needed? + xm_sleep_block <= 1'b0; end end end @@ -1075,6 +1110,8 @@ assign f_jump_target = m_trap_enter_vld ? m_trap_addr : x_jump_target; assign f_jump_priv = m_trap_enter_vld ? m_mmode_trap_entry : x_mmode_execution; assign x_jump_not_except = !m_trap_enter_vld; +// Stalls and sleep control + // EXCEPT_NONE clause is needed in the following sequence: // - Cycle 0: hresp asserted, hready low. We set the exception to squash behind us. Bus stall high. // - Cycle 1: hready high. For whatever reason, the frontend can't accept the trap address this cycle. @@ -1085,7 +1122,7 @@ wire m_bus_stall = m_dphase_in_flight && !bus_dph_ready_d && xm_except == EXCEPT assign m_stall = m_bus_stall || (m_trap_enter_vld && !m_trap_enter_rdy && !m_trap_is_irq) || - (xm_wfi && !m_wfi_stall_clear); + ((xm_sleep_wfi || xm_sleep_block) && !m_sleep_stall_release); // Exception is taken against the instruction currently in M, so walk the PC // back. IRQ is taken "in between" the instruction in M and the instruction @@ -1097,6 +1134,27 @@ assign m_exception_return_addr = d_pc - ( prev_instr_was_32_bit ? 32'h4 : 32'h2 ); +hazard3_power_ctrl power_ctrl ( + .clk_always_on (clk_always_on), + .rst_n (rst_n), + + .pwrup_req (pwrup_req), + .pwrup_ack (pwrup_ack), + .clk_en (clk_en), + + .allow_sleep (m_allow_sleep), + .allow_power_down (m_allow_power_down), + .allow_sleep_on_block (m_allow_sleep_on_block), + + .frontend_pwrdown_ok (f_frontend_pwrdown_ok), + + .sleeping_on_wfi (xm_sleep_wfi), + .wfi_wakeup_req (m_wfi_wakeup_req), + .sleeping_on_block (xm_sleep_wfi), + .block_wakeup_req_pulse (unblock_in), + .stall_release (m_sleep_stall_release) +); + // Load/store data handling always @ (*) begin @@ -1230,7 +1288,6 @@ always @ (posedge clk or negedge rst_n) begin end end - hazard3_regfile_1w2r #( .RESET_REGS (RESET_REGFILE), .N_REGS (32), diff --git a/hdl/hazard3_cpu_1port.v b/hdl/hazard3_cpu_1port.v index 348bf1a..bc960c4 100644 --- a/hdl/hazard3_cpu_1port.v +++ b/hdl/hazard3_cpu_1port.v @@ -14,12 +14,20 @@ module hazard3_cpu_1port #( ) ( // Global signals input wire clk, + input wire clk_always_on, input wire rst_n, `ifdef RISCV_FORMAL `RVFI_OUTPUTS , `endif + // Power control signals + output wire pwrup_req, + input wire pwrup_ack, + output wire clk_en, + output wire unblock_out, + input wire unblock_in, + // AHB5 Master port output reg [W_ADDR-1:0] haddr, output reg hwrite, @@ -105,8 +113,15 @@ hazard3_core #( `include "hazard3_config_inst.vh" ) core ( .clk (clk), + .clk_always_on (clk_always_on), .rst_n (rst_n), + .pwrup_req (pwrup_req), + .pwrup_ack (pwrup_ack), + .clk_en (clk_en), + .unblock_out (unblock_out), + .unblock_in (unblock_in), + `ifdef RISCV_FORMAL `RVFI_CONN , `endif diff --git a/hdl/hazard3_cpu_2port.v b/hdl/hazard3_cpu_2port.v index 4fef039..6f2829c 100644 --- a/hdl/hazard3_cpu_2port.v +++ b/hdl/hazard3_cpu_2port.v @@ -14,8 +14,16 @@ module hazard3_cpu_2port #( ) ( // Global signals input wire clk, + input wire clk_always_on, input wire rst_n, + // Power control signals + output wire pwrup_req, + input wire pwrup_ack, + output wire clk_en, + output wire unblock_out, + input wire unblock_in, + `ifdef RISCV_FORMAL `RVFI_OUTPUTS , `endif @@ -116,8 +124,15 @@ hazard3_core #( `include "hazard3_config_inst.vh" ) core ( .clk (clk), + .clk_always_on (clk_always_on), .rst_n (rst_n), + .pwrup_req (pwrup_req), + .pwrup_ack (pwrup_ack), + .clk_en (clk_en), + .unblock_out (unblock_out), + .unblock_in (unblock_in), + `ifdef RISCV_FORMAL `RVFI_CONN , `endif diff --git a/hdl/hazard3_csr.v b/hdl/hazard3_csr.v index 1773c76..f33ad3b 100644 --- a/hdl/hazard3_csr.v +++ b/hdl/hazard3_csr.v @@ -19,6 +19,7 @@ module hazard3_csr #( `include "hazard3_width_const.vh" ) ( input wire clk, + input wire clk_always_on, input wire rst_n, // Debug signalling @@ -80,7 +81,12 @@ module hazard3_csr #( // mode. input wire loadstore_dphase_pending, input wire [XLEN-1:0] mepc_in, - output wire wfi_stall_clear, + + // Power control signalling + output wire pwr_allow_sleep, + output wire pwr_allow_power_down, + output wire pwr_allow_sleep_on_block, + output wire pwr_wfi_wakeup_req, // Each of these may be performed at a different privilege level from the others: output wire m_mode_execution, @@ -426,11 +432,11 @@ end wire external_irq_pending; // Register external IRQ signals (mainly to avoid a through-path from IRQs to -// bus request signals) +// bus request signals). Always clocked, as it's used to generate a wakeup. reg [NUM_IRQS-1:0] irq_r; -always @ (posedge clk or negedge rst_n) begin +always @ (posedge clk_always_on or negedge rst_n) begin if (!rst_n) begin irq_r <= {NUM_IRQS{1'b0}}; end else begin @@ -504,6 +510,29 @@ hazard3_onehot_encode #( assign meinext_irq = meinext_irq_unmasked & {9{!meinext_noirq}}; +// ---------------------------------------------------------------------------- +// Custom sleep/power control CSRs + +reg msleep_sleeponblock; +reg msleep_powerdown; +reg msleep_deepsleep; + +always @ (posedge clk or negedge rst_n) begin + if (!rst_n) begin + msleep_sleeponblock <= 1'b0; + msleep_powerdown <= 1'b0; + msleep_deepsleep <= 1'b0; + end else if (wen_m_mode && addr == MSLEEP) begin + msleep_sleeponblock <= wdata_update[2] && |EXTENSION_XH3POWER; + msleep_powerdown <= wdata_update[1] && |EXTENSION_XH3POWER; + msleep_deepsleep <= wdata_update[0] && |EXTENSION_XH3POWER; + end +end + +assign pwr_allow_sleep_on_block = msleep_sleeponblock; +assign pwr_allow_power_down = msleep_powerdown; +assign pwr_allow_sleep = msleep_deepsleep; + // ---------------------------------------------------------------------------- // Counters @@ -682,7 +711,8 @@ always @ (*) begin 2'd0, // Z, Y, no |{ // X is set for any custom extensions |CSR_M_TRAP, - |EXTENSION_XH3B + |EXTENSION_XH3BEXTM + |EXTENSION_XH3POWER }, 2'd0, // V, W, no |U_MODE, @@ -1229,6 +1259,16 @@ always @ (*) begin }; end + MSLEEP: if (EXTENSION_XH3POWER) begin + decode_match = match_mrw; + rdata = { + 29'h0, + msleep_sleeponblock, + msleep_powerdown, + msleep_deepsleep + }; + end + default: begin end endcase end @@ -1247,21 +1287,29 @@ reg pending_dbg_resume_prev; wire pending_dbg_resume = (pending_dbg_resume_prev || dbg_req_resume_prev) && debug_mode; +// Halt request input register needs to always be clocked, because a WFI needs +// to fall through if the debugger requests halt of a sleeping core. +always @ (posedge clk_always_on or negedge rst_n) begin + if (!rst_n) begin + dbg_req_halt_prev <= 1'b0; + end else begin + // Just a delayed version of the request from outside of the core. + // Delay is fine because the DM awaits ack before deasserting. + dbg_req_halt_prev <= dbg_req_halt && DEBUG_SUPPORT != 0; + end +end + always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin have_just_reset <= |DEBUG_SUPPORT; step_halt_req <= 1'b0; dbg_req_resume_prev <= 1'b0; - dbg_req_halt_prev <= 1'b0; pending_dbg_resume_prev <= 1'b0; end else if (DEBUG_SUPPORT) begin if (instr_ret) have_just_reset <= 1'b0; - // Just a delayed version of the request from outside of the core. - // Delay is fine because the DM awaits ack before deasserting. dbg_req_resume_prev <= dbg_req_resume; - dbg_req_halt_prev <= dbg_req_halt; if (debug_mode) begin step_halt_req <= 1'b0; @@ -1346,7 +1394,8 @@ assign dbg_instr_caught_exception = debug_mode && except != EXCEPT_NONE && excep reg irq_software_r; reg irq_timer_r; -always @ (posedge clk or negedge rst_n) begin +// Always clocked, as it's used to generate a wakeup. +always @ (posedge clk_always_on or negedge rst_n) begin if (!rst_n) begin irq_software_r <= 1'b0; irq_timer_r <= 1'b0; @@ -1370,7 +1419,9 @@ wire irq_active = |(mip & mie) && mstatus_mie && !dcsr_step; // WFI clear respects individual interrupt enables but ignores mstatus.mie. // Additionally, wfi is treated as a nop during single-stepping and D-mode. -assign wfi_stall_clear = |(mip & mie) || dcsr_step || debug_mode || want_halt_irq_if_no_exception; +// Note that the IRQs and debug halt request input registers are clocked by +// clk_always_on, so that a wakeup can be generated when asleep. +assign pwr_wfi_wakeup_req = |(mip & mie) || dcsr_step || debug_mode || want_halt_irq_if_no_exception; // Priority order from priv spec: external > software > timer wire [3:0] standard_irq_num = diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 39c6dcd..5bf478e 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -51,7 +51,9 @@ module hazard3_decode #( output reg [W_ADDR-1:0] d_addr_offs, output reg d_addr_is_regoffs, output reg [W_EXCEPT-1:0] d_except, - output reg d_wfi, + output reg d_sleep_wfi, + output reg d_sleep_block, + output reg d_sleep_unblock, output reg d_fence_i ); @@ -199,7 +201,9 @@ always @ (*) begin d_addr_is_regoffs = 1'b0; d_invalid_32bit = 1'b0; d_except = EXCEPT_NONE; - d_wfi = 1'b0; + d_sleep_wfi = 1'b0; + d_sleep_block = 1'b0; + d_sleep_unblock = 1'b0; d_fence_i = 1'b0; // Note this funct3/funct7 are valid only for 32-bit instructions. They // are useful for clusters of related ALU ops, such as sh*add, clmul. @@ -310,8 +314,8 @@ always @ (*) begin `RVOPC_UNZIP: if (EXTENSION_ZBKB) begin d_aluop = ALUOP_UNZIP; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end `RVOPC_ZIP: if (EXTENSION_ZBKB) begin d_aluop = ALUOP_ZIP; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end - `RVOPC_H3_BEXTM: if (EXTENSION_XH3B) begin d_aluop = ALUOP_BEXTM; end else begin d_invalid_32bit = 1'b1; end - `RVOPC_H3_BEXTMI: if (EXTENSION_XH3B) begin d_aluop = ALUOP_BEXTM; d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end + `RVOPC_H3_BEXTM: if (EXTENSION_XH3BEXTM) begin d_aluop = ALUOP_BEXTM; end else begin d_invalid_32bit = 1'b1; end + `RVOPC_H3_BEXTMI: if (EXTENSION_XH3BEXTM) begin d_aluop = ALUOP_BEXTM; d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end `RVOPC_FENCE: begin d_rs2 = X0; end // NOP, note rs1/rd are zero in instruction `RVOPC_FENCE_I: if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_fence_i = 1'b1; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction @@ -324,7 +328,7 @@ always @ (*) begin `RVOPC_ECALL: if (HAVE_CSR) begin d_except = m_mode || !U_MODE ? EXCEPT_ECALL_M : EXCEPT_ECALL_U; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end `RVOPC_EBREAK: if (HAVE_CSR) begin d_except = EXCEPT_EBREAK; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end `RVOPC_MRET: if (HAVE_CSR && m_mode) begin d_except = EXCEPT_MRET; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end - `RVOPC_WFI: if (HAVE_CSR && permit_wfi) begin d_wfi = 1'b1; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end + `RVOPC_WFI: if (HAVE_CSR && permit_wfi) begin d_sleep_wfi = 1'b1; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end default: begin d_invalid_32bit = 1'b1; end endcase @@ -338,7 +342,7 @@ always @ (*) begin d_csr_ren = 1'b0; d_csr_wen = 1'b0; d_except = EXCEPT_NONE; - d_wfi = 1'b0; + d_sleep_wfi = 1'b0; if (EXTENSION_M) d_aluop = ALUOP_ADD; diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 1bb52a7..ba6ce35 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -63,6 +63,12 @@ module hazard3_frontend #( // is dependent on a bus stall signal so can't gate the request. input wire cir_flush_behind, + // Signal to power controller that power down is safe. (When going to + // sleep, first the pipeline is stalled, and then the power controller + // waits for the frontend to naturally come to a halt before releasing + // its power request. This avoids manually halting the frontend.) + output wire pwrdown_ok, + // Provide the rs1/rs2 register numbers which will be in CIR next cycle. // Coarse: valid if this instruction has a nonzero register operand. // (Suitable for regfile read) @@ -73,7 +79,6 @@ module hazard3_frontend #( output reg [4:0] predecode_rs1_fine, output reg [4:0] predecode_rs2_fine, - // Debugger instruction injection: instruction fetch is suppressed when in // debug halt state, and the DM can then inject instructions into the last // entry of the prefetch queue using the vld/rdy handshake. @@ -174,6 +179,8 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update end end +assign pwrdown_ok = fifo_full && !jump_target_vld; + // ---------------------------------------------------------------------------- // Branch target buffer diff --git a/hdl/hazard3_power_ctrl.v b/hdl/hazard3_power_ctrl.v new file mode 100644 index 0000000..400fef9 --- /dev/null +++ b/hdl/hazard3_power_ctrl.v @@ -0,0 +1,159 @@ +/*****************************************************************************\ +| Copyright (C) 2022 Luke Wren | +| SPDX-License-Identifier: Apache-2.0 | +\*****************************************************************************/ + +`default_nettype none + +// Wake/sleep (power) state machine for Hazard3 + +module hazard3_power_ctrl #( +`include "hazard3_config.vh" +) ( + input wire clk_always_on, + input wire rst_n, + + // 4-phase (Gray code) req/ack handshake for requesting and releasing + // power+clock enable on non-processor hardware, e.g. the bus fabric. This + // can also be used for an external controller to gate the processor's clk + // input, rather than the clk_en signal below. + output reg pwrup_req, + input wire pwrup_ack, + + // Top-level clock enable for an optional clock gate on the processor's clk + // input (but not clk_always_on, which clocks this module and the IRQ input + // flops). This allows the processor to clock-gate when sleeping. It's + // acceptable for the clock gate cell to have one cycle of delay when + // clk_en changes. + output reg clk_en, + + // Power state controls from CSRs + input wire allow_sleep, + input wire allow_power_down, + input wire allow_sleep_on_block, + + // Signal from frontend that it has stalled against the WFI pipeline + // stall, and we are now clear to enter a deep sleep state + input wire frontend_pwrdown_ok, + + input wire sleeping_on_wfi, + input wire wfi_wakeup_req, + input wire sleeping_on_block, + input wire block_wakeup_req_pulse, + output reg stall_release +); + +// ---------------------------------------------------------------------------- +// Wake/sleep state machine + +localparam W_STATE = 2; +localparam S_AWAKE = 2'h0; +localparam S_ENTER_ASLEEP = 2'h1; +localparam S_ASLEEP = 2'h2; +localparam S_ENTER_AWAKE = 2'h3; + +reg [W_STATE-1:0] state; +reg block_wakeup_req; + +wire active_wake_req = + (sleeping_on_block && (block_wakeup_req || wfi_wakeup_req)) || + (sleeping_on_wfi && wfi_wakeup_req); + +// Note: we assert our power up request during reset, and *assume* that the +// power up acknowledge is also high at reset. If this is a problem, extend +// the core reset. + +always @ (posedge clk_always_on or negedge rst_n) begin + if (!rst_n) begin + state <= S_AWAKE; + pwrup_req <= 1'b1; + clk_en <= 1'b1; + stall_release <= 1'b0; + end else begin + stall_release <= 1'b0; + case (state) + S_AWAKE: if (sleeping_on_wfi || sleeping_on_block) begin + if (stall_release) begin + // The last cycle of an ongoing which we have just released. Sit + // tight, this instruction will move down the pipeline at the + // end of this cycle. (There is an assertion that this doesn't + // happen twice.) + state <= S_AWAKE; + end else if (active_wake_req) begin + // Skip deep sleep if it would immediately fall through. + stall_release <= 1'b1; + end else if ((allow_power_down || allow_sleep) && (sleeping_on_wfi || allow_sleep_on_block)) begin + if (frontend_pwrdown_ok) begin + pwrup_req <= !allow_power_down; + clk_en <= !allow_sleep; + state <= allow_power_down ? S_ENTER_ASLEEP : S_ASLEEP; + end else begin + // Stay awake until it is safe to power down (i.e. until our + // instruction fetch goes quiet). + state <= S_AWAKE; + end + end else begin + // No power state change. Just sit with the pipeline stalled. + state <= S_AWAKE; + end + end + S_ENTER_ASLEEP: if (!pwrup_ack) begin + state <= S_ASLEEP; + end + S_ASLEEP: if (active_wake_req) begin + pwrup_req <= 1'b1; + clk_en <= 1'b1; + // Still go through the enter state for non-power-down wakeup, in + // case the clock gate cell has a 1 cycle delay. + state <= S_ENTER_AWAKE; + end + S_ENTER_AWAKE: if (pwrup_ack || !allow_power_down) begin + state <= S_AWAKE; + stall_release <= 1'b1; + end + default: begin + state <= S_AWAKE; + end + endcase + end +`ifdef HAZARD3_ASSERTIONS + // These must always be mutually exclusive. + assert(!(sleeping_on_wfi && sleeping_on_block)); + if (stall_release) begin + // Presumably there was a stall which we just released + assert($past(sleeping_on_wfi) || $past(sleeping_on_block)); + // Presumably we are still in that stall + assert(sleeping_on_wfi|| sleeping_on_block); + // It takes one cycle to do a release and enter a new sleep state, so a + // double release should be impossible. + assert(!$past(stall_release)); + end + if (state == S_ASLEEP) begin + assert(allow_power_down || allow_sleep); + end +`endif +end + +// ---------------------------------------------------------------------------- +// Pulse->level for block wakeup + +// Unblock signal is sticky: a prior unblock with no block since will cause +// the next block to immediately fall through. + +always @ (posedge clk_always_on or negedge rst_n) begin + if (!rst_n) begin + block_wakeup_req <= 1'b0; + end else begin + // Note the OR takes precedence over the AND, so we don't miss a second + // unblock that arrives at the instant we wake up. + block_wakeup_req <= (block_wakeup_req && !( + sleeping_on_block && stall_release + )) || block_wakeup_req_pulse; + end +end + +endmodule + +`ifndef YOSYS +`default_nettype wire +`endif diff --git a/test/sim/sw_testcases/wfi_loop_deepsleep_powerdown.c b/test/sim/sw_testcases/wfi_loop_deepsleep_powerdown.c new file mode 100644 index 0000000..6d6adc9 --- /dev/null +++ b/test/sim/sw_testcases/wfi_loop_deepsleep_powerdown.c @@ -0,0 +1,65 @@ +#include "tb_cxxrtl_io.h" +#include "hazard3_csr.h" + +// Same as wfi_loop, but enable msleep.deepsleep and msleep.powerdown first. + +/*EXPECTED-OUTPUT*************************************************************** + +Enabling IRQS... +IRQ 1 +IRQ 2 +IRQ 3 +IRQ 4 +IRQ 5 +IRQ 6 +IRQ 7 +IRQ 8 +IRQ 9 +IRQ 10 +Took 10 IRQs, span 9 times + +*******************************************************************************/ + +#define TIMER_INTERVAL 1000 +#define MAX_IRQ_COUNT 10 + +#define __wfi() asm volatile ("wfi") +#define __compiler_mb() asm volatile ("" ::: "memory") + +int irq_count; +void __attribute__((interrupt)) isr_machine_timer() { + __compiler_mb(); + ++irq_count; + __compiler_mb(); + + tb_printf("IRQ %d\n", irq_count); + + // Disable timer IRQ via MTIE, or set the next timer IRQ + if (irq_count >= MAX_IRQ_COUNT) + asm ("csrc mie, %0" :: "r" (1u << 7)); + else + mm_timer->mtimecmp = mm_timer->mtime + TIMER_INTERVAL; +} + +int main() { + + irq_count = 0; + __compiler_mb(); + // Per-IRQ enable for timer IRQ + asm ("csrs mie, %0" :: "r" (1u << 7)); + write_csr(hazard3_csr_msleep, 0x7); + tb_puts("Enabling IRQS...\n"); + // Global IRQ enable. Timer IRQ will fire immediately. + asm ("csrsi mstatus, 0x8"); + + // Count the number of sleep loop iterations to make sure the wfi waits + int wait_spin_count; + while (irq_count < MAX_IRQ_COUNT) { + ++wait_spin_count; + __wfi(); + __compiler_mb(); + } + + tb_printf("Took %d IRQs, span %d times\n", irq_count, wait_spin_count); + return irq_count != wait_spin_count + 1; +} diff --git a/test/sim/tb_cxxrtl/tb.v b/test/sim/tb_cxxrtl/tb.v index 77d2d58..45d83b3 100644 --- a/test/sim/tb_cxxrtl/tb.v +++ b/test/sim/tb_cxxrtl/tb.v @@ -133,6 +133,12 @@ wire sbus_err; wire [31:0] sbus_wdata; wire [31:0] sbus_rdata; +wire pwrup_req; +wire pwrup_ack = pwrup_req; +wire clk_en; +wire unblock_out; +wire unblock_in = unblock_out; + hazard3_dm #( .N_HARTS (N_HARTS), .HAVE_SBA (1), @@ -205,8 +211,15 @@ hazard3_cpu_2port #( `include "hazard3_config_inst.vh" ) cpu ( .clk (clk), + .clk_always_on (clk), .rst_n (rst_n_cpu), + .pwrup_req (pwrup_req), + .pwrup_ack (pwrup_ack), + .clk_en (clk_en), + .unblock_out (unblock_out), + .unblock_in (unblock_in), + .i_haddr (i_haddr), .i_hwrite (i_hwrite), .i_htrans (i_htrans),