Files
GameAndWatch_MiSTer/rtl/cpu/instructions.sv
2026-05-13 23:41:02 -05:00

828 lines
20 KiB
Systemverilog

interface instructions (
input wire [3:0] cpu_id,
// Data
input wire [7:0] opcode,
input wire [7:0] last_opcode,
input wire [7:0] melody_data,
input wire [3:0] ram_data,
// Internal
input wire gamma,
input wire [14:0] divider,
input wire divider_4hz,
input wire divider_32hz,
input wire [5:0] last_Pl,
// IO
input wire [3:0] input_k,
input wire input_beta,
input wire input_ba
);
////////////////////////////////////////////////////////////////////////////////////////
// Instruction controlled registers
// PC
reg [1:0] Pu = 0;
reg [3:0] Pm = 0;
reg [5:0] Pl = 0;
wire [11:0] pc = {Pu, Pm, Pl};
wire [11:0] rom_addr = pc;
// Reused as entire stack in SM5a
reg [11:0] stack_s = 0;
reg [11:0] stack_r = 0;
// Accumulator
reg [3:0] Acc = 0;
reg carry = 0;
// LCD Functions
// LCD pulse generator circuit
reg lcd_bp = 0;
// LCD bleeder circuit (on means no display)
reg lcd_bc = 0;
reg [3:0] segment_l = 0;
// TODO: Currently unused. See LCD pulsing
reg [3:0] segment_y = 0;
reg [3:0] segment_x = 0;
reg [7:0] shifter_w = 0;
reg [7:0] shifter_s = 0;
// Control
reg skip_next_instr = 0;
// Skip next instruction only if next is LAX
reg skip_next_if_lax = 0;
reg temp_sbm = 0;
reg [5:0] next_ram_addr = 0;
reg wr_next_ram_addr = 0;
reg reset_divider = 0;
reg reset_divider_keep_6 = 0;
reg reset_gamma = 0;
reg halt = 0;
reg [3:0] stored_output_r = 0;
reg [3:0] output_r = 0;
// SM511/SM512 melody controller and clock selection
reg sm511_slow_clock = 1;
reg [1:0] melody_rd = 0;
reg [4:0] melody_step_count = 0;
reg [4:0] melody_duty_count = 0;
reg [1:0] melody_duty_index = 0;
reg [7:0] melody_address = 0;
reg melody_active_tone = 0;
reg [4:0] melody_target_cycles = 0;
localparam R_MASK_DIRECT = 3'h7;
// Direct passthrough of R0 on 0x7, otherwise use the divider bit indicated by this value
reg [2:0] output_r_mask = R_MASK_DIRECT;
////////////////////////////////////////////////////////////////////////////////////////
// RAM
// RAM Address
reg [2:0] Bm = 0;
reg [3:0] Bl = 0;
wire [6:0] ram_addr = {Bm, Bl};
reg ram_wr = 0;
reg [3:0] ram_wr_data = 0;
////////////////////////////////////////////////////////////////////////////////////////
// SM5a Registers
// Bank select used by some jumps
reg cb_bank = 0;
// MAME calls this `m_rsub`
reg within_subroutine = 0;
reg [3:0] w_prime[9];
reg [3:0] w_main[9];
// LCD CN flag. MAME uses bit 3 of `m_bp` for this
reg lcd_cn = 0;
reg m_prime = 0;
////////////////////////////////////////////////////////////////////////////////////////
// Instruction shortcuts
task exc_x(reg swap);
// Swap Acc and RAM
Acc <= ram_data;
if (swap) begin
ram_wr_data <= Acc;
ram_wr <= 1;
end
// XOR Bm with immed
// Will be written in STAGE_LOAD_PC
next_ram_addr[5:4] <= Bm[1:0] ^ opcode[1:0];
wr_next_ram_addr <= 1;
endtask
task incb();
// INCB. Increment Bl. If Bl was 0xF, skip next
next_ram_addr[3:0] <= Bl + 4'h1;
wr_next_ram_addr <= 1;
skip_next_instr <= Bl == 4'hF;
endtask
task incb_sm500();
// INCB. Increment Bl. If Bl was 0x7, skip next
next_ram_addr[3:0] <= Bl + 4'h1;
wr_next_ram_addr <= 1;
skip_next_instr <= Bl == 4'h7;
endtask
task decb();
// DECB. Decrement Bl. If Bl was 0x0, skip next
next_ram_addr[3:0] <= Bl - 4'h1;
wr_next_ram_addr <= 1;
skip_next_instr <= Bl == 4'h0;
endtask
task pop_stack(reg update_s);
{Pu, Pm, Pl} <= stack_s;
if (update_s) begin
stack_s <= stack_r;
end
endtask
task push_stack(reg [11:0] next_pc);
stack_r <= stack_s;
stack_s <= next_pc;
endtask
////////////////////////////////////////////////////////////////////////////////////////
// Melody/Output
wire [3:0] melody_tone = melody_data[3:0];
wire [5:0] melody_tone_key = {melody_duty_index, melody_tone};
wire melody_active_tone_next = melody_tone >= 4'd2 && melody_tone <= 4'd13;
reg [3:0] melody_tone_cycles = 0;
wire [4:0] melody_target_cycles_next =
melody_data[4] ? {1'b0, melody_tone_cycles} : {melody_tone_cycles, 1'b0};
always_comb begin
case (melody_tone_key)
6'h02: melody_tone_cycles = 4'd7;
6'h03: melody_tone_cycles = 4'd8;
6'h04: melody_tone_cycles = 4'd8;
6'h05: melody_tone_cycles = 4'd9;
6'h06: melody_tone_cycles = 4'd9;
6'h07: melody_tone_cycles = 4'd10;
6'h08: melody_tone_cycles = 4'd11;
6'h09: melody_tone_cycles = 4'd11;
6'h0A: melody_tone_cycles = 4'd12;
6'h0B: melody_tone_cycles = 4'd13;
6'h0C: melody_tone_cycles = 4'd14;
6'h0D: melody_tone_cycles = 4'd14;
6'h12: melody_tone_cycles = 4'd8;
6'h13: melody_tone_cycles = 4'd8;
6'h14: melody_tone_cycles = 4'd9;
6'h15: melody_tone_cycles = 4'd9;
6'h16: melody_tone_cycles = 4'd10;
6'h17: melody_tone_cycles = 4'd11;
6'h18: melody_tone_cycles = 4'd11;
6'h19: melody_tone_cycles = 4'd12;
6'h1A: melody_tone_cycles = 4'd13;
6'h1B: melody_tone_cycles = 4'd13;
6'h1C: melody_tone_cycles = 4'd14;
6'h1D: melody_tone_cycles = 4'd15;
6'h22: melody_tone_cycles = 4'd8;
6'h23: melody_tone_cycles = 4'd8;
6'h24: melody_tone_cycles = 4'd9;
6'h25: melody_tone_cycles = 4'd9;
6'h26: melody_tone_cycles = 4'd10;
6'h27: melody_tone_cycles = 4'd10;
6'h28: melody_tone_cycles = 4'd11;
6'h29: melody_tone_cycles = 4'd12;
6'h2A: melody_tone_cycles = 4'd12;
6'h2B: melody_tone_cycles = 4'd13;
6'h2C: melody_tone_cycles = 4'd14;
6'h2D: melody_tone_cycles = 4'd15;
6'h32: melody_tone_cycles = 4'd8;
6'h33: melody_tone_cycles = 4'd9;
6'h34: melody_tone_cycles = 4'd9;
6'h35: melody_tone_cycles = 4'd10;
6'h36: melody_tone_cycles = 4'd10;
6'h37: melody_tone_cycles = 4'd11;
6'h38: melody_tone_cycles = 4'd11;
6'h39: melody_tone_cycles = 4'd12;
6'h3A: melody_tone_cycles = 4'd13;
6'h3B: melody_tone_cycles = 4'd14;
6'h3C: melody_tone_cycles = 4'd14;
6'h3D: melody_tone_cycles = 4'd15;
default: melody_tone_cycles = 4'd0;
endcase
end
task automatic clock_melody();
case (cpu_id)
1, 2, 6, 7: begin
// SM511/SM512 dedicated melody generator. The melody ROM stores 6-bit commands in
// 8-bit bytes; bit 5 selects duration and bit 4 selects octave.
reg [5:0] cmd;
reg [3:0] tone;
reg [4:0] target_cycles;
reg [4:0] next_duty_count;
reg [4:0] step_mask;
reg [4:0] next_step_count;
reg active_tone;
reg out;
cmd = melody_data[5:0];
tone = cmd[3:0];
active_tone = melody_active_tone_next;
target_cycles = melody_target_cycles_next;
next_duty_count = melody_duty_count + 5'd1;
out = 0;
melody_active_tone <= active_tone;
melody_target_cycles <= active_tone ? target_cycles : 5'd0;
if (active_tone) begin
out = melody_duty_index[0] & melody_rd[0];
if (next_duty_count >= target_cycles) begin
melody_duty_count <= 0;
melody_duty_index <= melody_duty_index + 2'd1;
end else begin
melody_duty_count <= next_duty_count;
end
end else begin
if (tone == 4'd1) begin
melody_rd[1] <= 1;
end
end
if ((divider & 15'h007F) == 15'h0000) begin
step_mask = cmd[5] ? 5'h1F : 5'h0F;
next_step_count = (melody_step_count + 5'd1) & step_mask;
melody_step_count <= next_step_count;
if (next_step_count == 5'h00) begin
melody_address <= melody_address + 8'd1;
end
end
output_r <= {3'b000, out};
end
4: begin
// SM5a
reg r0_mask;
r0_mask = output_r_mask == R_MASK_DIRECT ? 1'b1 : divider[output_r_mask];
output_r <= {~stored_output_r[3:1], r0_mask && ~stored_output_r[0]};
end
default: begin
// SM510/SM510 Tiger
if (output_r_mask == R_MASK_DIRECT) begin
output_r <= {2'b0, stored_output_r[1:0]};
end else begin
reg [3:0] out;
out = divider[output_r_mask];
out[3] = out[3] | out[0];
output_r <= stored_output_r & out;
end
end
endcase
endtask
////////////////////////////////////////////////////////////////////////////////////////
// Instructions
task atbp();
// ATBP. Set LCD BP to Acc
lcd_bp <= Acc[0];
lcd_cn <= Acc[3];
endtask
task sbm();
// SBM. Set high bit of Bm high for next instruction only. Returns to previous value after
// This is masked directly into the RAM input
temp_sbm <= 1;
endtask
task sbm_sm500();
// SBM. Set high bit of Bm high
Bm[2] <= 1;
endtask
task atpl();
// ATPL. Load Pl with Acc
// Since Pl was already incremented, we need to make sure the upper two bits
// haven't changed, so we restore the old value
Pl <= {last_Pl[5:4], Acc};
endtask
task rm();
// 0x04-07: RM x. Zero RAM at bit indexed by immediate
reg [3:0] temp;
temp = ram_data;
// Zero bit at index
temp[opcode[1:0]] = 0;
ram_wr_data <= temp;
ram_wr <= 1;
endtask
task add();
// ADD. Add RAM to Acc
Acc <= Acc + ram_data;
endtask
task add11();
// ADD11. Add RAM to Acc with carry. Skip next instruction if carry
reg [4:0] result;
result = Acc + ram_data + carry;
{carry, Acc} <= result;
skip_next_instr <= result[4];
endtask
task coma();
// COMA. NOT Acc (complement Acc)
Acc <= ~Acc;
endtask
task exbla();
// EXBLA. Swap Acc and Bl
Acc <= Bl;
Bl <= Acc;
endtask
task sm();
// 0x0C-0F: SM x. Set RAM at bit indexed by immediate
reg [3:0] temp;
temp = ram_data;
// Set bit at index
temp[opcode[1:0]] = 1;
ram_wr_data <= temp;
ram_wr <= 1;
endtask
// task exc();
// // 0x10-13: EXC x. Swap Acc and RAM. XOR Bm with immed
// exc_x(1);
// endtask
// task exci();
// // 0x14-17: EXCI x. Swap Acc and RAM. XOR Bm with immed. Increment Bl. If Bl was 0xF, skip next
// exc_x(1);
// incb();
// endtask
// task lda();
// // 0x18-1B: LDA x. Load Acc with RAM value. XOR Bm with immed
// exc_x(0);
// endtask
// task excd();
// // 0x1C-1F: EXCD x. Swap Acc and RAM. XOR Bm with immed. Decrement Bl. If Bl was 0x0, skip next
// exc_x(1);
// decb();
// endtask
task lax();
// LAX x. Load Acc with immed. If next instruction is LAX, skip it
Acc <= opcode[3:0];
skip_next_if_lax <= 1;
endtask
task adx();
// ADX x. Add immed to Acc. Skip next instruction if carry is set
// Do not skip if immediate is 0xA due to die bug
reg [4:0] result;
result = Acc + opcode[3:0];
Acc <= result[3:0];
// Die bug when 0xA. Do nothing
skip_next_instr <= result[4] && opcode[3:0] != 4'hA;
endtask
task lb();
// LB x. Set lower Bm to immed. Set lower Bl to immed. Set upper Bl to ORed immed
// OR is questionable here according to docs, but other implementations (MAME) use OR
reg ored;
ored = opcode[3] | opcode[2];
Bl <= {ored, ored, opcode[3:2]};
Bm[1:0] <= opcode[1:0];
endtask
task lb_sm500();
// LB x. Set Bm to lower 2 bits immed. Set lower Bl to upper 2 bits immed. Set upper Bl to 2 if immed had data
Bl <= {opcode[3:2] != 0 ? 2'b10 : 2'b0, opcode[3:2]};
Bm <= {1'b0, opcode[1:0]};
endtask
task tb();
// TB. Skip next instruction if Beta is 1
skip_next_instr <= input_beta;
endtask
task tc();
// TC. Skip next instruction if C = 0
skip_next_instr <= ~carry;
endtask
task tam();
// TAM. Skip next instruction if Acc = RAM value
skip_next_instr <= Acc == ram_data;
endtask
task tmi();
// TMI x. Skip next instruction if indexed memory bit is set
skip_next_instr <= ram_data[opcode[1:0]];
endtask
task tis();
// TIS. Skip next instruction if one second clock divider signal is low. Zero gamma
// TODO: All sources seem to consider gamma as the one second signal. We're using it for now
skip_next_instr <= ~gamma;
reset_gamma <= 1;
endtask
task atl();
// ATL. Set segment output L to Acc
segment_l <= Acc;
endtask
task tao();
// TAO. Skip next instruction if Acc = 0
skip_next_instr <= Acc == 4'h0;
endtask
task tabl();
// TABL. Skp next instruction if Acc = Bl
skip_next_instr <= Acc == Bl;
endtask
task cend();
// CEND. Stop clock
halt <= 1;
reset_divider <= 1;
endtask
task tal();
// TAL. Skip next instruction if BA = 1
skip_next_instr <= input_ba == 1;
endtask
task atfc();
// ATFC. Set segment output Y to Acc
segment_y <= Acc;
endtask
task atx();
// ATX. Set segment output X to Acc. Used by SM511/SM512 for the second BS bit.
segment_x <= Acc;
endtask
task atr();
// ATR. Set R buzzer control value to the bottom two bits of Acc
stored_output_r <= Acc;
endtask
task ptw_s();
// PTW. Latch W to the S output port. SM510 updates S directly on WR/WS;
// SM511/SM512 update it explicitly with this instruction.
shifter_s <= shifter_w;
endtask
task wr();
// WR. Shift 0 into W
reg [7:0] next_w;
next_w = {shifter_w[6:0], 1'b0};
shifter_w <= next_w;
if (cpu_id == 0 || cpu_id == 5) begin
shifter_s <= next_w;
end
endtask
// task wr_sm500(reg [3:0] w_length);
// // WR. Shift Acc (0 high bit) into W'
// shift_w_prime(w_length, Acc & 4'h7);
// endtask
task ws();
// WS. Shift 1 into W
reg [7:0] next_w;
next_w = {shifter_w[6:0], 1'b1};
shifter_w <= next_w;
if (cpu_id == 0 || cpu_id == 5) begin
shifter_s <= next_w;
end
endtask
// task ws_sm500(reg [3:0] w_length);
// // WS. Shift Acc (1 high bit) into W'
// shift_w_prime(w_length, Acc | 4'h8);
// endtask
task idiv();
// IDIV. Reset clock divider
reset_divider <= 1;
endtask
task idiv_sm500();
// IDIV. Reset clock divider, keeping lower 6 bits
reset_divider_keep_6 <= 1;
endtask
task rc();
// RC. Clear carry
carry <= 0;
endtask
task sc();
// SC. Set carry
carry <= 1;
endtask
task tf1();
// TF1. Skip next instruction if F1 = 1 (clock divider 14th bit)
skip_next_instr <= divider_4hz;
endtask
task tf4();
// TF4. Skip next instruction if F4 = 1 (clock divider 11th bit)
skip_next_instr <= divider_32hz;
endtask
task kta();
// KTA. Read K input bits into Acc
Acc <= input_k;
endtask
task rot();
// ROT. Rotate right
{Acc, carry} <= {carry, Acc};
endtask
task bdc();
// BDC. Set LCD power. Display is on when low
lcd_bc <= carry;
endtask
// task rtn0();
// // RTN0. Pop stack. Move S into PC, and R into S
// pop_stack();
// within_subroutine <= 0;
// endtask
// task rtn1();
// // RTN1. Pop stack. Move S into PC, and R into S. Skip next instruction
// pop_stack();
// skip_next_instr <= 1;
// within_subroutine <= 0;
// endtask
task t();
// T xy. Short jump, within page. Set Pl to immediate
Pl <= opcode[5:0];
endtask
// task tm();
// // TM x. Jumps to IDX table, and executes that instruction. Push PC + 1 into stack
// push_stack(pc);
// {Pu, Pm, Pl} <= {2'b0, 4'b0, opcode[5:0]};
// endtask
////////////////////////////////////////////////////////////////////////////////////////
// SM5a Instructions
task ptw(reg [3:0] w_length);
// PTW. Copy last two values from W' to W
w_main[w_length-1] <= w_prime[w_length-1];
w_main[w_length-2] <= w_prime[w_length-2];
endtask
task tw(reg [3:0] w_length);
// TW. Copy W' to W
int i;
for (i = 0; i < w_length; i += 1) begin
w_main[i] <= w_prime[i];
end
endtask
reg [3:0] pla_data[32];
task init_pla();
// Quartus ignores the initialization of pla_data if I do it inline (maybe because of the interface?)
// We call separately as a part of reset to initialize the PLA
pla_data <= '{
4'he,
4'h0,
4'hc,
4'h8,
4'h2,
4'ha,
4'he,
4'h2,
4'he,
4'ha,
4'h0,
4'h0,
4'h2,
4'ha,
4'h2,
4'h2,
4'hb,
4'h9,
4'h7,
4'hf,
4'hd,
4'he,
4'he,
4'hb,
4'hf,
4'hf,
4'h4,
4'h0,
4'hd,
4'he,
4'h4,
4'h0
};
endtask
function [3:0] pla_digit();
reg [3:0] temp;
temp = pla_data[{lcd_cn, Acc}];
return temp | (~lcd_cn && m_prime);
endfunction
task shift_w_prime(reg [3:0] w_length, reg [3:0] new_value);
int i;
for (i = 0; i < 8; i += 1) begin
w_prime[i] <= w_prime[i+1];
end
// Put new value in correct position
w_prime[w_length-1] <= new_value;
endtask
// task dtw(reg [3:0] w_length);
// // DTW. Shift PLA value into W'
// reg [3:0] digit;
// digit = pla_digit();
// shift_w_prime(w_length, digit);
// endtask
task comcn();
// COMCN. XOR (complement) LCD CN flag
lcd_cn <= lcd_cn ^ 1'b1;
endtask
// task pdtw(reg [3:0] w_length);
// // PDTW. Shift last two nibbles of W', moving one PLA value in
// reg [3:0] w_prime_temp[9];
// reg [3:0] digit;
// digit = pla_digit();
// w_prime_temp[w_length-2] = w_prime_temp[w_length-1];
// w_prime_temp[w_length-1] = digit;
// w_prime <= w_prime_temp;
// endtask
task rmf();
// RMF. Clear m' and Acc
m_prime <= 0;
Acc <= 0;
endtask
task smf();
// SMF. Set m'
m_prime <= 1;
endtask
task rbm();
// RBM. Clear Bm high bit
Bm[2] <= 0;
endtask
task comcb();
// COMCB. XOR (complement) CB
cb_bank <= cb_bank ^ 1'b1;
endtask
task ssr();
// SSR. Set stack higher bits bits to immed. Set E for next inst
stack_s[9:6] <= opcode[3:0];
endtask
task tr();
// TR. Long/short jump. Uses stack page value for distance
// Short jump is set regardless
Pl <= opcode[5:0];
if (~within_subroutine) begin
// Do long jump. Pl was already set above
{Pu, Pm} <= {1'b0, cb_bank, stack_s[9:6]};
end
endtask
// task trs(reg field);
// // TRS. Call subroutine
// if (within_subroutine) begin
// Pl <= {2'b0, opcode[3:0]};
// Pm[1:0] <= opcode[5:4];
// end else begin
// // Enter subroutine
// reg [3:0] temp_su;
// within_subroutine <= 1;
// temp_su = stack_s[9:6];
// push_stack(pc);
// if (last_opcode[7:4] == 4'h7) begin
// // Last instruction was SSR, and E flag would be set
// {Pu, Pm, Pl} <= {1'b0, cb_bank, temp_su, opcode[5:0]};
// end else begin
// {Pu, Pm, Pl} <= {1'b0, field, 4'b0, opcode[5:0]};
// end
// end
// endtask
task dta();
// DTA. Copy high bits of clock divider to Acc
Acc <= divider[14:11];
endtask
task pre();
// PRE. Preset the SM511/SM512 melody ROM pointer.
melody_address <= opcode;
melody_step_count <= 0;
endtask
task sme();
// SME. Enable SM511/SM512 melody playback.
melody_rd[0] <= 1;
endtask
task rme();
// RME. Disable SM511/SM512 melody playback.
melody_rd[0] <= 0;
endtask
task tmel();
// TMEL. Skip if the melody stop flag is set, then clear it.
skip_next_instr <= melody_rd[1];
melody_rd[1] <= 0;
endtask
task clklo();
// CLKLO. Select the 8.192kHz SM511/SM512 instruction clock.
sm511_slow_clock <= 1;
endtask
task clkhi();
// CLKHI. Select the 16.384kHz SM511/SM512 instruction clock.
sm511_slow_clock <= 0;
endtask
endinterface