diff --git a/Gameboy.sv b/Gameboy.sv index 1e712f1..86eba15 100644 --- a/Gameboy.sv +++ b/Gameboy.sv @@ -195,6 +195,8 @@ assign BUTTONS = 0; assign HDMI_FREEZE = 0; assign VGA_SCALER= 0; assign VGA_DISABLE = 0; +assign HDMI_BLACKOUT = 0; +assign HDMI_BOB_DEINT = 0; assign AUDIO_MIX = status[8:7]; @@ -210,7 +212,7 @@ assign DDRAM_WE = 0; // 0 1 2 3 4 5 6 // 01234567890123456789012345678901 23456789012345678901234567890123 // 0123456789ABCDEFGHIJKLMNOPQRSTUV 0123456789ABCDEFGHIJKLMNOPQRSTUV -// XXXXXXXXXXXXXXXXXXXXXXXX X XXXXXXX +// XXXXXXXXXXXXXXXXXXXXXXXX X XXXXXXXX `include "build_id.v" localparam CONF_STR = { @@ -228,16 +230,17 @@ localparam CONF_STR = { "P1,Audio & Video;", "P1-;", + "P1O[44],Extra sprites,No,Yes;", "P1ON,Seperator Line,Off,On;", "P1OC,Inverted color,No,Yes;", "P1O12,Custom Palette,Off,Auto,On;", "h1P1FC3,GBP,Load Palette;", + "d4P1OU,GBC Colors,Corrected,Raw;", + "P1O5,Sync Video,Off,On;", "P1-;", "P1O34,Aspect ratio,Original,Full Screen,[ARC1],[ARC2];", "P1OLM,Scale,Normal,V-Integer,Narrower HV-Integer,Wider HV-Integer;", "P1OIK,Scandoubler Fx,None,HQ2x,CRT 25%,CRT 50%,CRT 75%;", - "d4P1OU,GBC Colors,Corrected,Raw;", - "P1O5,Sync Video,Off,On;", "P1-;", "P1O78,Stereo mix,none,25%,50%,100%;", "P1O[43],Audio mode,Accurate,No Pops;", @@ -637,12 +640,14 @@ gb gb1 ( .clk_sys ( clk_sys ), .ce ( ce1_cpu ), // the whole gameboy runs on 4mhnz + .ce_n ( ce1_cpu_n ), // 4MHz falling edge clock enable .ce_2x ( ce1_cpu2x ), // ~8MHz in dualspeed mode (GBC) .isGBC ( isGBC ), .real_cgb_boot ( using_real_cgb_bios ), .isSGB ( 1'b0 ), .megaduck ( megaduck ), + .extra_spr_en( status[44] ), .joy_p54 ( joy1_p54 ), .joy_din ( joy1_do ), @@ -826,6 +831,7 @@ gb gb2 ( .clk_sys ( clk_sys ), .ce ( ce2_cpu ), // the whole gameboy runs on 4mhnz + .ce_n ( ce2_cpu_n ), // 4MHz falling edge clock enable .ce_2x ( ce2_cpu2x ), // ~8MHz in dualspeed mode (GBC) @@ -833,6 +839,7 @@ gb gb2 ( .real_cgb_boot ( using_real_cgb_bios ), .isSGB ( 1'b0 ), .megaduck ( megaduck ), + .extra_spr_en( status[44] ), .joy_p54 ( joy2_p54 ), .joy_din ( joy2_do ), @@ -1050,8 +1057,8 @@ video_freak video_freak //////////////////////////////// CE //////////////////////////////////// -wire ce1_cpu, ce1_cpu2x; -wire ce2_cpu, ce2_cpu2x; +wire ce1_cpu, ce1_cpu_n, ce1_cpu2x; +wire ce2_cpu, ce2_cpu_n, ce2_cpu2x; wire ce_ram2x; speedcontrol speedcontrol1 @@ -1062,6 +1069,7 @@ speedcontrol speedcontrol1 .romack (sdram_ack), .pausevideo (pauseVideoCore1 & status[5]), .ce (ce1_cpu), + .ce_n (ce1_cpu_n), .ce_2x (ce1_cpu2x) ); @@ -1073,6 +1081,7 @@ speedcontrol speedcontrol2 .romack (sdram_ack2), .pausevideo (pauseVideoCore2 & status[5]), .ce (ce2_cpu), + .ce_n (ce2_cpu_n), .ce_2x (ce2_cpu2x) ); diff --git a/files.qip b/files.qip index daffe35..1bcfbbf 100644 --- a/files.qip +++ b/files.qip @@ -8,6 +8,8 @@ set_global_assignment -name SYSTEMVERILOG_FILE rtl/megaswizzle.sv set_global_assignment -name VERILOG_FILE rtl/video.v set_global_assignment -name VERILOG_FILE rtl/timer.v set_global_assignment -name VERILOG_FILE rtl/sprites.v +set_global_assignment -name VERILOG_FILE rtl/sprites_extra.v +set_global_assignment -name VERILOG_FILE rtl/sprites_extra_store.v set_global_assignment -name VERILOG_FILE rtl/lcd.v set_global_assignment -name VHDL_FILE rtl/gbc_snd.vhd set_global_assignment -name VHDL_FILE rtl/speedcontrol.vhd diff --git a/releases/Gameboy2P_20250621.rbf b/releases/Gameboy2P_20250621.rbf new file mode 100644 index 0000000..ee231a4 Binary files /dev/null and b/releases/Gameboy2P_20250621.rbf differ diff --git a/rtl/gb.v b/rtl/gb.v index 44e6715..75621dd 100644 --- a/rtl/gb.v +++ b/rtl/gb.v @@ -24,12 +24,14 @@ module gb ( input clk_sys, input ce, + input ce_n, input ce_2x, input [7:0] joystick, input isGBC, input real_cgb_boot, input isSGB, + input extra_spr_en, // cartridge interface // can adress up to 1MB ROM @@ -683,6 +685,7 @@ video video ( .reset ( reset_ss ), .clk ( clk_sys ), .ce ( ce ), // 4Mhz + .ce_n ( ce_n ), .ce_cpu ( ce_cpu ), //can be 2x in cgb double speed mode .isGBC ( isGBC ), .isGBC_mode ( isGBC_mode ), //enable GBC mode during bootstrap rom @@ -720,6 +723,9 @@ video video ( .dma_rd ( dma_rd ), .dma_addr ( dma_addr ), .dma_data ( dma_data ), + + .extra_spr_en( extra_spr_en ), + .extra_wait ( (isGBC & hdma_rd) | dma_rd | sel_vram ), .Savestate_OAMRAMAddr (Savestate_RAMAddr[7:0]), .Savestate_OAMRAMRWrEn (Savestate_RAMRWrEn[2]), @@ -904,10 +910,10 @@ assign SS_Top_BACK[23] = boot_rom_enabled; always @(posedge clk_sys) begin if(reset_ss) boot_rom_enabled <= SS_Top[23]; // 1'b1; - else if (ce) begin - if((cpu_addr == 16'hff50) && !cpu_wr_n_edge) - if ((isGBC && cpu_do[7:0]==8'h11) || (!isGBC && cpu_do[0])) - boot_rom_enabled <= 1'b0; + else if (ce) begin + if((cpu_addr == 16'hff50) && !cpu_wr_n_edge && cpu_do[0]) begin + boot_rom_enabled <= 1'b0; + end end end diff --git a/rtl/hdma.v b/rtl/hdma.v index 411c4b7..1def0d6 100644 --- a/rtl/hdma.v +++ b/rtl/hdma.v @@ -28,18 +28,18 @@ module hdma( ); // savestates -wire [44:0] SS_HDMA; -wire [44:0] SS_HDMA_BACK; +wire [47:0] SS_HDMA; +wire [47:0] SS_HDMA_BACK; -eReg_SavestateV #(0, 7, 44, 0, 64'h0000000001FFFFF0) iREG_SAVESTATE_HDMA (clk, SaveStateBus_Din, SaveStateBus_Adr, SaveStateBus_wren, SaveStateBus_rst, SaveStateBus_Dout, SS_HDMA_BACK, SS_HDMA); +eReg_SavestateV #(0, 7, 47, 0, 64'h0000E00001FFFFF0) iREG_SAVESTATE_HDMA (clk, SaveStateBus_Din, SaveStateBus_Adr, SaveStateBus_wren, SaveStateBus_rst, SaveStateBus_Dout, SS_HDMA_BACK, SS_HDMA); //"The preparation time (4 clocks) is the same in single and double speed mode" localparam START_DELAY = 3'd4, END_DELAY = 3'd4; // ff51-ff55 HDMA1-5 (GBC) -reg [11:0] hdma_source; // ff51, ff52 only top 4 bits used -reg [8:0] hdma_target; // ff53 only lowest 5 bits used, ff54 only top 4 bits used +reg [15:4] hdma_source; // ff51, ff52 only top 4 bits used +reg [15:4] hdma_target; // ff53, ff54 only top 4 bits used reg hdma_mode; // ff55 bit 7 - 0 = General Purpose DMA / 1 = H-Blank DMA reg hdma_enabled; // ff55 !bit 7 when read @@ -53,8 +53,8 @@ wire [1:0] byte_cycles = speed ? 2'd3 : 2'd1; reg [1:0] hdma_cnt; //assign hdma_rd = hdma_active; -assign hdma_source_addr = { hdma_source, byte_cnt }; -assign hdma_target_addr = { 3'b100,hdma_target, byte_cnt }; +assign hdma_source_addr = { hdma_source, byte_cnt }; +assign hdma_target_addr = { hdma_target, byte_cnt }; reg [2:0] dma_delay; @@ -68,7 +68,7 @@ assign SS_HDMA_BACK[ 0] = hdma_active ; assign SS_HDMA_BACK[ 2: 1] = hdma_state ; assign SS_HDMA_BACK[ 3] = hdma_enabled; assign SS_HDMA_BACK[15: 4] = hdma_source ; -assign SS_HDMA_BACK[24:16] = hdma_target ; +assign SS_HDMA_BACK[24:16] = hdma_target[12:4]; assign SS_HDMA_BACK[27:25] = dma_delay ; assign SS_HDMA_BACK[ 28] = hdma_rd ; assign SS_HDMA_BACK[ 29] = hdma_end ; @@ -76,6 +76,7 @@ assign SS_HDMA_BACK[ 30] = hdma_mode ; assign SS_HDMA_BACK[38:31] = hdma_length ; assign SS_HDMA_BACK[42:39] = byte_cnt ; assign SS_HDMA_BACK[44:43] = hdma_cnt ; +assign SS_HDMA_BACK[47:45] = hdma_target[15:13]; always @(posedge clk) begin @@ -84,7 +85,7 @@ always @(posedge clk) begin hdma_state <= SS_HDMA[ 2: 1]; // wait_h; hdma_enabled <= SS_HDMA[ 3]; // 1'b0; hdma_source <= SS_HDMA[15: 4]; // 12'hFFF; - hdma_target <= SS_HDMA[24:16]; // 9'h1FF; + hdma_target <= { SS_HDMA[47:45], SS_HDMA[24:16] }; // 12'hFFF; dma_delay <= SS_HDMA[27:25]; // 3'd0; hdma_rd <= SS_HDMA[ 28]; // 0; hdma_end <= SS_HDMA[ 29]; // 0; @@ -97,10 +98,10 @@ always @(posedge clk) begin if(sel_reg && wr) begin case (addr) - 4'd1: hdma_source[11:4] <= din; - 4'd2: hdma_source[3:0] <= din[7:4]; - 4'd3: hdma_target[8:4] <= din[4:0]; - 4'd4: hdma_target[3:0] <= din[7:4]; + 4'd1: hdma_source[15:8] <= din; + 4'd2: hdma_source[7:4] <= din[7:4]; + 4'd3: hdma_target[15:8] <= din; + 4'd4: hdma_target[7:4] <= din[7:4]; // writing the hdma register engages the dma engine 4'h5: begin diff --git a/rtl/lcd.v b/rtl/lcd.v index 3a2130c..404afef 100644 --- a/rtl/lcd.v +++ b/rtl/lcd.v @@ -331,10 +331,6 @@ wire [4:0] r5 = pixel_out[4:0]; wire [4:0] g5 = pixel_out[9:5]; wire [4:0] b5 = pixel_out[14:10]; -wire [31:0] r10 = (r5 * 13) + (g5 * 2) +b5; -wire [31:0] g10 = (g5 * 3) + b5; -wire [31:0] b10 = (r5 * 3) + (g5 * 2) + (b5 * 11); - // greyscale wire [7:0] grey = (pixel==0) ? 8'd252 : (pixel==1) ? 8'd168 : (pixel==2) ? 8'd96 : 8'd0; @@ -347,12 +343,38 @@ function [7:0] blend; end endfunction +reg [3:0] color_lut_sr; +wire [7:0] color_lut_dout; + +wire [9:0] color_lut_addr = + color_lut_sr[0] ? { r5, r5 } : // red only + color_lut_sr[1] ? { b5, g5 } : // mixed blue + green + { b5, b5 }; // blue only + +// Color LUT for mix of 2 colors with 2 stage gamma +dpram_dif #(10,8,10,8,"lcd_color_lut.mif") lcd_color_lut ( + .clock (clk_vid), + + .address_a (color_lut_addr), + .q_a (color_lut_dout) +); + + +reg [7:0] r_lut, g_lut, b_lut; +always@(posedge clk_vid) begin + color_lut_sr <= { color_lut_sr[2:0], ce_pix_n }; + + if (color_lut_sr[1]) begin r_lut <= color_lut_dout; end + if (color_lut_sr[2]) begin g_lut <= color_lut_dout; end + if (color_lut_sr[3]) begin b_lut <= color_lut_dout; end +end + reg [7:0] r_tmp, g_tmp, b_tmp; always@(*) begin if (isGBC & !originalcolors) begin - r_tmp = r10[8:1]; - g_tmp = {g10[6:0],1'b0}; - b_tmp = b10[8:1]; + r_tmp = r_lut; + g_tmp = g_lut; + b_tmp = b_lut; end else if (isGBC & originalcolors) begin r_tmp = {r5,r5[4:2]}; g_tmp = {g5,g5[4:2]}; diff --git a/rtl/lcd_color_lut.mif b/rtl/lcd_color_lut.mif new file mode 100644 index 0000000..e7c197a --- /dev/null +++ b/rtl/lcd_color_lut.mif @@ -0,0 +1,80 @@ +-- GBC LCD color LUT +-- Calculation: +-- col1, col2 range 0 - 31 +-- a = col1 * 255 / 31 (to range 0-255) +-- b = col2 * 255 / 31 +-- c = pow(a/255, 2.2) * 255 (pre-gamma 2.2) +-- d = pow(b/255, 2.2) * 255 +-- e = ((c * 13) + (d * 3)) / 16 (mix color 13/16 + 3/16) +-- f = pow(e/255, 1/2.2 * 0.7) * 255 (final gamma 0.7) + +DEPTH = 1024; +WIDTH = 8; +ADDRESS_RADIX = HEX; +DATA_RADIX = HEX; +CONTENT BEGIN +0000: 00 16 23 2F 39 43 4C 54 5C 64 6C 74 7B 82 89 90; +0010: 96 9D A3 A9 B0 B6 BC C2 C8 CD D3 D9 DE E4 E9 EF; +0020: 0E 17 24 2F 39 43 4C 54 5D 64 6C 74 7B 82 89 90; +0030: 96 9D A3 A9 B0 B6 BC C2 C8 CD D3 D9 DE E4 E9 EF; +0040: 16 1B 25 30 3A 43 4C 55 5D 65 6C 74 7B 82 89 90; +0050: 96 9D A3 AA B0 B6 BC C2 C8 CD D3 D9 DE E4 E9 EF; +0060: 1D 20 28 32 3B 44 4D 55 5D 65 6D 74 7B 82 89 90; +0070: 97 9D A3 AA B0 B6 BC C2 C8 CD D3 D9 DE E4 E9 EF; +0080: 24 26 2C 34 3D 45 4E 56 5E 66 6D 74 7C 83 89 90; +0090: 97 9D A4 AA B0 B6 BC C2 C8 CE D3 D9 DF E4 E9 EF; +00A0: 2A 2B 30 37 3F 47 4F 57 5F 66 6E 75 7C 83 8A 91; +00B0: 97 9E A4 AA B0 B6 BC C2 C8 CE D3 D9 DF E4 EA EF; +00C0: 2F 31 35 3B 42 49 51 58 60 67 6F 76 7D 84 8A 91; +00D0: 98 9E A4 AA B1 B7 BD C2 C8 CE D4 D9 DF E4 EA EF; +00E0: 35 36 39 3E 45 4B 53 5A 61 68 70 77 7E 84 8B 92; +00F0: 98 9E A5 AB B1 B7 BD C3 C9 CE D4 DA DF E5 EA EF; +0100: 3A 3B 3E 42 48 4E 55 5C 63 6A 71 78 7E 85 8C 92; +0110: 99 9F A5 AB B1 B7 BD C3 C9 CF D4 DA DF E5 EA F0; +0120: 3F 40 42 46 4B 51 57 5E 65 6B 72 79 7F 86 8D 93; +0130: 99 A0 A6 AC B2 B8 BE C3 C9 CF D5 DA E0 E5 EA F0; +0140: 44 44 46 4A 4E 54 5A 60 66 6D 74 7A 81 87 8D 94; +0150: 9A A0 A6 AC B2 B8 BE C4 CA CF D5 DA E0 E5 EB F0; +0160: 48 49 4B 4E 52 57 5C 62 68 6F 75 7B 82 88 8E 95; +0170: 9B A1 A7 AD B3 B9 BF C4 CA D0 D5 DB E0 E6 EB F0; +0180: 4D 4D 4F 52 55 5A 5F 65 6B 71 77 7D 83 89 90 96; +0190: 9C A2 A8 AE B4 BA BF C5 CB D0 D6 DB E1 E6 EC F1; +01A0: 51 52 53 56 59 5D 62 67 6D 73 79 7F 85 8B 91 97; +01B0: 9D A3 A9 AF B4 BA C0 C6 CB D1 D6 DC E1 E7 EC F1; +01C0: 56 56 57 5A 5D 61 65 6A 6F 75 7B 80 86 8C 92 98; +01D0: 9E A4 AA B0 B5 BB C1 C6 CC D1 D7 DC E2 E7 EC F2; +01E0: 5A 5A 5C 5E 60 64 68 6D 72 77 7D 82 88 8E 94 99; +01F0: 9F A5 AB B1 B6 BC C2 C7 CD D2 D8 DD E2 E8 ED F2; +0200: 5E 5F 60 61 64 67 6B 70 74 79 7F 84 8A 8F 95 9B; +0210: A0 A6 AC B2 B7 BD C2 C8 CD D3 D8 DE E3 E8 ED F3; +0220: 62 63 64 65 68 6B 6E 73 77 7C 81 86 8C 91 97 9C; +0230: A2 A7 AD B3 B8 BE C3 C9 CE D4 D9 DE E4 E9 EE F3; +0240: 66 67 67 69 6B 6E 72 75 7A 7E 83 88 8E 93 98 9E; +0250: A3 A9 AE B4 B9 BF C4 CA CF D4 DA DF E4 EA EF F4; +0260: 6A 6A 6B 6D 6F 71 75 78 7D 81 86 8B 90 95 9A 9F; +0270: A5 AA B0 B5 BA C0 C5 CB D0 D5 DB E0 E5 EA EF F5; +0280: 6E 6E 6F 70 72 75 78 7B 7F 84 88 8D 92 97 9C A1; +0290: A6 AC B1 B6 BC C1 C6 CC D1 D6 DB E1 E6 EB F0 F5; +02A0: 72 72 73 74 76 78 7B 7E 82 86 8B 8F 94 99 9E A3; +02B0: A8 AD B2 B8 BD C2 C7 CD D2 D7 DC E1 E7 EC F1 F6; +02C0: 76 76 77 78 79 7C 7E 82 85 89 8D 91 96 9B A0 A5; +02D0: AA AF B4 B9 BE C3 C9 CE D3 D8 DD E2 E7 ED F2 F7; +02E0: 79 7A 7A 7B 7D 7F 82 85 88 8C 90 94 98 9D A2 A6; +02F0: AB B0 B5 BA C0 C5 CA CF D4 D9 DE E3 E8 ED F2 F7; +0300: 7D 7D 7E 7F 80 82 85 88 8B 8E 92 96 9B 9F A4 A8; +0310: AD B2 B7 BC C1 C6 CB D0 D5 DA DF E4 E9 EE F3 F8; +0320: 81 81 81 82 84 86 88 8B 8E 91 95 99 9D A1 A6 AA; +0330: AF B4 B9 BE C2 C7 CC D1 D6 DB E0 E5 EA EF F4 F9; +0340: 84 84 85 86 87 89 8B 8E 91 94 98 9B 9F A4 A8 AC; +0350: B1 B6 BA BF C4 C9 CE D3 D8 DD E1 E6 EB F0 F5 FA; +0360: 88 88 89 89 8B 8C 8E 91 94 97 9A 9E A2 A6 AA AE; +0370: B3 B7 BC C1 C6 CA CF D4 D9 DE E3 E7 EC F1 F6 FB; +0380: 8B 8C 8C 8D 8E 90 92 94 97 9A 9D A0 A4 A8 AC B0; +0390: B5 B9 BE C2 C7 CC D1 D5 DA DF E4 E9 ED F2 F7 FC; +03A0: 8F 8F 8F 90 91 93 95 97 9A 9C A0 A3 A7 AA AE B3; +03B0: B7 BB C0 C4 C9 CD D2 D7 DC E0 E5 EA EF F3 F8 FD; +03C0: 92 92 93 94 95 96 98 9A 9D 9F A2 A6 A9 AD B1 B5; +03D0: B9 BD C2 C6 CA CF D4 D8 DD E2 E6 EB F0 F4 F9 FE; +03E0: 96 96 96 97 98 99 9B 9D 9F A2 A5 A8 AC AF B3 B7; +03F0: BB BF C3 C8 CC D1 D5 DA DE E3 E8 EC F1 F6 FA FF; +END; diff --git a/rtl/reg_savestates.vhd b/rtl/reg_savestates.vhd index 7f85dba..e44867f 100644 --- a/rtl/reg_savestates.vhd +++ b/rtl/reg_savestates.vhd @@ -19,7 +19,7 @@ package pReg_savestates is -- components constant REG_SAVESTATE_Timer : regmap_type := ( 6, 46, 0, 1, x"0000000000000008"); - constant REG_SAVESTATE_HDMA : regmap_type := ( 7, 44, 0, 1, x"0000000001FFFFF0"); + constant REG_SAVESTATE_HDMA : regmap_type := ( 7, 47, 0, 1, x"0000E00001FFFFF0"); constant REG_SAVESTATE_Link : regmap_type := ( 8, 16, 0, 1, x"0000000000000000"); diff --git a/rtl/speedcontrol.vhd b/rtl/speedcontrol.vhd index 1e1f4d6..35b3c7f 100644 --- a/rtl/speedcontrol.vhd +++ b/rtl/speedcontrol.vhd @@ -11,6 +11,7 @@ entity speedcontrol is romack : in std_logic; pausevideo : in std_logic; ce : out std_logic := '0'; + ce_n : out std_logic := '0'; ce_2x : out std_logic := '0' ); end entity; @@ -34,6 +35,7 @@ begin if falling_edge(clk_sys) then ce <= '0'; + ce_n <= '0'; ce_2x <= '0'; skipclock := '0'; @@ -42,6 +44,9 @@ begin if (clkdiv = "000") then ce <= '1'; end if; + if (clkdiv = "100") then + ce_n <= '1'; + end if; if (clkdiv(1 downto 0) = "00") then ce_2x <= '1'; end if; @@ -76,6 +81,7 @@ begin if (skipclock = '1') then ce <= '0'; + ce_n <= '0'; ce_2x <= '0'; if (clkdiv = "100") then clkdiv <= "001"; diff --git a/rtl/sprites.v b/rtl/sprites.v index 4914be5..ec1ceff 100644 --- a/rtl/sprites.v +++ b/rtl/sprites.v @@ -32,7 +32,8 @@ module sprites ( // pixel position input which the current pixel is generated for input [7:0] v_cnt, input [7:0] h_cnt, - + + input sprite_fetch_c1, input sprite_fetch_done, output sprite_fetch, @@ -41,7 +42,7 @@ module sprites ( output oam_eval, output [10:0] sprite_addr, - output reg [7:0] sprite_attr, + output [7:0] sprite_attr, output [3:0] sprite_index, output oam_eval_end, @@ -52,6 +53,21 @@ module sprites ( input [7:0] oam_addr_in, input [7:0] oam_di, output [7:0] oam_do, + + input extra_spr_en, + input extra_wait, + + output extra_tile_fetch, + output [11:0] extra_tile_addr, + input [7:0] tile_data_in, + + output spr_extra_found, + output [7:0] spr_extra_tile0, + output [7:0] spr_extra_tile1, + output [2:0] spr_extra_cgb_pal, + output [3:0] spr_extra_index, + output spr_extra_pal, + output spr_extra_prio, // savestates input [7:0] Savestate_OAMRAMAddr, @@ -62,48 +78,72 @@ module sprites ( localparam SPRITES_PER_LINE = 10; -reg [7:0] oam_spr_addr; -wire [7:0] oam_fetch_addr; -reg [7:0] oam_q; +wire [7:2] oam_eval_addr, oam_fetch_addr; +wire [7:0] oam_l_q, oam_h_q; reg oam_eval_en; assign oam_eval = lcd_on & ~oam_eval_end & oam_eval_en & ~oam_eval_reset; -wire [7:0] oam_addr = dma_active ? oam_addr_in : - oam_eval ? oam_spr_addr : - oam_fetch ? oam_fetch_addr : - oam_addr_in; +wire [3:0] fetch_row; + +wire oam_eval_extra; +wire [7:1] oam_extra_addr; +wire [7:0] spr_extra_fetch_attr; + +wire [7:1] oam_addr = dma_active ? oam_addr_in[7:1] : + oam_eval_extra ? { oam_extra_addr } : + oam_eval ? { oam_eval_addr, 1'b0 } : + oam_fetch ? { oam_fetch_addr, 1'b1 } : + oam_addr_in[7:1]; wire valid_oam_addr = (oam_addr[7:4] < 4'hA); // $FEA0 - $FEFF unused range -assign oam_do = dma_active ? 8'hFF : valid_oam_addr ? oam_q : 8'd0; +assign oam_do = ~valid_oam_addr ? 8'd0 : (oam_addr_in[0] ? oam_h_q : oam_l_q); +wire [7:0] Savestate_OAMRAMReadDataL, Savestate_OAMRAMReadDataH; -dpram #(8) oam_data ( +dpram #(7,8) oam_data_l ( .clock_a (clk ), - .address_a (oam_addr ), - .wren_a (ce_cpu && oam_wr && valid_oam_addr), + .address_a (oam_addr[7:1]), + .wren_a (ce_cpu && oam_wr && valid_oam_addr && ~oam_addr_in[0]), .data_a (oam_di ), - .q_a (oam_q ), + .q_a (oam_l_q ), .clock_b (clk), - .address_b (Savestate_OAMRAMAddr ), - .wren_b (Savestate_OAMRAMRWrEn ), + .address_b (Savestate_OAMRAMAddr[7:1]), + .wren_b (Savestate_OAMRAMRWrEn & ~Savestate_OAMRAMAddr[0]), .data_b (Savestate_OAMRAMWriteData), - .q_b (Savestate_OAMRAMReadData ) + .q_b (Savestate_OAMRAMReadDataL) ); +dpram #(7,8) oam_data_h ( + .clock_a (clk ), + .address_a (oam_addr[7:1] ), + .wren_a (ce_cpu && oam_wr && valid_oam_addr && oam_addr_in[0]), + .data_a (oam_di ), + .q_a (oam_h_q ), + + .clock_b (clk), + .address_b (Savestate_OAMRAMAddr[7:1]), + .wren_b (Savestate_OAMRAMRWrEn & Savestate_OAMRAMAddr[0]), + .data_b (Savestate_OAMRAMWriteData), + .q_b (Savestate_OAMRAMReadDataH) +); + +assign Savestate_OAMRAMReadData = Savestate_OAMRAMAddr[0] ? Savestate_OAMRAMReadDataH : Savestate_OAMRAMReadDataL; + reg [7:0] sprite_x[0:SPRITES_PER_LINE-1]; reg [3:0] sprite_y[0:SPRITES_PER_LINE-1]; reg [5:0] sprite_no[0:SPRITES_PER_LINE-1]; // OAM evaluation. Get the first 10 sprites on the current line. -reg [5:0] spr_index; // 40 sprites +reg [5:0] spr_index, spr_index_d; // 40 sprites reg [3:0] sprite_cnt; -reg sprite_cycle; +reg oam_eval_clk, oam_eval_clk_d, oam_eval_save; -reg [7:0] spr_y; +reg [7:0] sprite_x_attr, tile_index_y; wire [7:0] spr_height = size16 ? 8'd16 : 8'd8; -wire sprite_on_line = (v_cnt + 8'd16 >= spr_y) && (v_cnt + 8'd16 < spr_y + spr_height); +wire sprite_on_line = (v_cnt + 8'd16 >= tile_index_y) && (v_cnt + 8'd16 < tile_index_y + spr_height); +wire sprite_save = oam_eval_clk_d & oam_eval_en & sprite_on_line; assign oam_eval_end = (spr_index == 6'd40); @@ -117,8 +157,8 @@ always @(posedge clk) begin if (oam_eval_reset | ~lcd_on) begin sprite_cnt <= 0; spr_index <= ~lcd_on ? 6'd1 : 6'd0; - sprite_cycle <= 0; - oam_spr_addr <= 0; + oam_eval_clk <= 0; + oam_eval_clk_d <= 0; oam_eval_en <= oam_eval_reset ? 1'b1 : 1'b0; // OAM evaluation does not run on the first line after enabling the lcd for (spr_i=0; spr_i < SPRITES_PER_LINE; spr_i=spr_i+1) begin sprite_x[spr_i] <= 8'hFF; @@ -127,24 +167,19 @@ always @(posedge clk) begin end else begin if (~oam_eval_end) begin - if (sprite_cycle) spr_index <= spr_index + 1'b1; - - if (oam_eval_en && sprite_cnt < SPRITES_PER_LINE) begin - if (~sprite_cycle) begin - spr_y <= oam_do; - oam_spr_addr <= {spr_index,2'b01}; - end else begin - if (sprite_on_line) begin - sprite_no[sprite_cnt] <= spr_index; - sprite_x[sprite_cnt] <= oam_do; - sprite_y[sprite_cnt] <= v_cnt[3:0] - spr_y[3:0]; - sprite_cnt <= sprite_cnt + 1'b1; - end - oam_spr_addr <= {spr_index+1'b1, 2'b00}; - end + if (oam_eval_clk) begin + spr_index <= spr_index + 1'b1; + spr_index_d <= spr_index; end + oam_eval_clk <= ~oam_eval_clk; + end - sprite_cycle <= ~sprite_cycle; + oam_eval_clk_d <= oam_eval_clk; + if (sprite_save & (sprite_cnt < SPRITES_PER_LINE)) begin + sprite_no[sprite_cnt] <= spr_index_d; + sprite_x[sprite_cnt] <= sprite_x_attr; + sprite_y[sprite_cnt] <= v_cnt[3:0] - tile_index_y[3:0]; + sprite_cnt <= sprite_cnt + 1'b1; end // Set X-position to FF after fetching the sprite to prevent fetching it again. @@ -166,6 +201,18 @@ always @(posedge clk) begin end end +assign oam_eval_addr = spr_index; + +wire eval_save_xy = (~oam_eval_end & oam_eval_en & oam_eval_clk & ~dma_active); +wire fetch_save_index_attr = (sprite_fetch & sprite_fetch_c1); +always @(posedge clk) begin + if (ce) begin + if (eval_save_xy | fetch_save_index_attr) begin + tile_index_y <= oam_l_q; + sprite_x_attr <= oam_h_q; + end + end +end // Sprite fetching assign sprite_x_matches = { @@ -195,31 +242,56 @@ wire [3:0] active_sprite = sprite_x_matches[8] ? 4'd8 : 4'd9; assign sprite_index = active_sprite; +assign sprite_attr = oam_eval_extra ? spr_extra_fetch_attr : sprite_x_attr; -wire [5:0] oam_fetch_index = sprite_no[active_sprite]; +assign oam_fetch_addr = sprite_no[active_sprite]; -reg [3:0] row; -reg [7:0] tile_no; -reg oam_fetch_cycle; -assign oam_fetch_addr = {oam_fetch_index, 1'b1, oam_fetch_cycle}; -assign sprite_addr = size16 ? {tile_no[7:1],row} : {tile_no,row[2:0]}; +assign fetch_row = sprite_attr[6] ? ~sprite_y[active_sprite] : sprite_y[active_sprite]; -always @(posedge clk) begin - if (ce) begin - if (sprite_fetch) begin +assign sprite_addr = size16 ? {tile_index_y[7:1],fetch_row} : {tile_index_y,fetch_row[2:0]}; - if (~oam_fetch_cycle) begin - tile_no <= oam_do; - end else begin - sprite_attr <= oam_do; - row <= oam_do[6] ? ~sprite_y[active_sprite] : sprite_y[active_sprite]; - end +// Extra sprites: +// Sprite tile fetching during mode3 reduces the length of HBlank. +// Simply adding more sprites will shorten HBlank even more which breaks timing. +// Instead, this module will try to fetch tile data for extra sprites during mode2 if VRAM is idle. +sprites_extra sprites_extra ( + .clk ( clk ), + .ce ( ce ), - oam_fetch_cycle <= ~oam_fetch_cycle; - end else begin - oam_fetch_cycle <= 0; - end - end -end + .extra_spr_en ( extra_spr_en ), + + .v_cnt ( v_cnt ), + .h_cnt ( h_cnt ), + + .oam_eval_clk ( oam_eval_clk ), + .oam_eval_reset ( oam_eval_reset | ~lcd_on), + .oam_eval_end ( oam_eval_end ), + + .size16 ( size16 ), + + .oam_index ( spr_index ), + .sprite_cnt ( sprite_cnt ), + + .oam_l_q ( oam_l_q ), + .oam_h_q ( oam_h_q ), + + .extra_wait ( extra_wait ), + .oam_eval_extra ( oam_eval_extra ), + .oam_extra_addr ( oam_extra_addr ) , + + .spr_fetch_attr ( spr_extra_fetch_attr ), + + .tile_fetch ( extra_tile_fetch ), + .tile_data_in ( tile_data_in ), + .tile_addr ( extra_tile_addr ), + + .spr_found ( spr_extra_found ), + .spr_tile0 ( spr_extra_tile0 ), + .spr_tile1 ( spr_extra_tile1 ), + .spr_pal ( spr_extra_pal ), + .spr_prio ( spr_extra_prio ), + .spr_cgb_pal ( spr_extra_cgb_pal ), + .spr_index ( spr_extra_index ) +); endmodule \ No newline at end of file diff --git a/rtl/sprites_extra.v b/rtl/sprites_extra.v new file mode 100644 index 0000000..31a6a22 --- /dev/null +++ b/rtl/sprites_extra.v @@ -0,0 +1,205 @@ +module sprites_extra ( + input clk, + input ce, + + input extra_spr_en, + + input oam_eval_end, + input oam_eval_clk, + input oam_eval_reset, + + input size16, + + input [5:0] oam_index, + input [3:0] sprite_cnt, + + input [7:0] oam_l_q, + input [7:0] oam_h_q, + + input [7:0] v_cnt, + input [7:0] h_cnt, + + input extra_wait, + output oam_eval_extra, + output [7:1] oam_extra_addr, + + output [7:0] spr_fetch_attr, + + output tile_fetch, + output [11:0] tile_addr, + input [7:0] tile_data_in, + + output spr_found, + output [7:0] spr_tile0, + output [7:0] spr_tile1, + output [2:0] spr_cgb_pal, + output [3:0] spr_index, + output spr_pal, + output spr_prio +); + +// Maximum extra sprites is 6 currently because sprite index in the pixel shifters is 4 bits. +localparam SPRITES_PER_LINE = 10; +localparam SPRITES_EXTRA = 6; + +wire [SPRITES_EXTRA-1:0] sprite_x_matches; + +reg [7:0] sprite_x; +reg [3:0] sprite_y; + +reg [5:0] extra_oam_index; +reg [3:0] new_sprite_x_index; +reg [4:0] extra_sprite_index; + +reg oam_attr_fetch; + +reg [7:0] tile_index; +reg [7:0] tile_y; +reg [3:0] tile_row; +reg [7:0] tile_attr; + +reg extra_waiting; +wire extra_pause = extra_wait | extra_waiting; + +wire oam_extra_start = extra_spr_en & (sprite_cnt == SPRITES_PER_LINE); + +assign oam_eval_extra = oam_extra_start & ~oam_eval_end & ~extra_pause; +assign oam_extra_addr = { extra_oam_index, oam_attr_fetch }; + +assign spr_fetch_attr = tile_attr; + +wire [7:0] spr_height = size16 ? 8'd16 : 8'd8; +wire sprite_on_line = (v_cnt + 8'd16 >= oam_l_q) && (v_cnt + 8'd16 < oam_l_q + spr_height); + +reg tile_fetching; +reg [3:0] tile_fetch_x_index; +reg [3:0] tile_fetch_sprite_index; +reg [1:0] tile_fetch_cnt; +reg tile1_fetch; + +wire tile_save; +reg save_x; + +always @(posedge clk) begin + if (ce) begin + if (~oam_extra_start) begin + extra_waiting <= 0; + end else if (oam_eval_clk) begin + extra_waiting <= extra_wait; + end + end +end + +always @(posedge clk) begin + if (ce) begin + + save_x <= 0; + + if (~oam_extra_start) begin + extra_oam_index <= oam_index; + oam_attr_fetch <= 0; + new_sprite_x_index <= 0; + extra_sprite_index <= SPRITES_PER_LINE[4:0]; + end else begin + if (oam_eval_clk & ~extra_pause) begin + if (~oam_attr_fetch) begin + if (sprite_on_line) begin + sprite_x <= oam_h_q; + sprite_y <= v_cnt[3:0] - oam_l_q[3:0]; + oam_attr_fetch <= 1; + end else begin + extra_oam_index <= extra_oam_index + 1'b1; + end + end else begin // Fetched attributes + extra_oam_index <= extra_oam_index + 1'b1; + oam_attr_fetch <= 0; + + tile_index <= oam_l_q; + tile_attr <= oam_h_q; + tile_row <= oam_h_q[6] ? ~sprite_y : sprite_y; + tile_fetch_sprite_index <= extra_sprite_index[3:0]; + + if (extra_sprite_index != SPRITES_PER_LINE+SPRITES_EXTRA) begin + if (~spr_found) begin // Skip sprite if this X position was already found + tile_fetch_x_index <= new_sprite_x_index; + save_x <= 1; // Store X position and start tile fetch + + new_sprite_x_index <= new_sprite_x_index + 1'b1; + end + + extra_sprite_index <= extra_sprite_index + 1'b1; + end + end + end + end + end +end + +assign tile_addr[11:5] = tile_index[7:1]; +assign tile_addr[4:1] = size16 ? tile_row : { tile_index[0],tile_row[2:0] }; +assign tile_addr[0] = tile1_fetch; + +assign tile_fetch = (save_x | tile_fetching) & ~extra_pause; +assign tile_save = tile_fetch & oam_eval_clk & tile1_fetch; + +reg [7:0] tile_data_0; + +always @(posedge clk) begin + if (ce) begin + if (oam_eval_reset | oam_eval_end) begin + tile_fetching <= 0; + tile1_fetch <= 0; + end else begin + if (save_x) begin + tile_fetching <= 1; + end + + if (tile_fetch & oam_eval_clk ) begin + if (~tile1_fetch) begin + tile_data_0 <= tile_data_in; + end else begin + tile_fetching <= 0; + end + tile1_fetch <= ~tile1_fetch; + end + end + end +end + +wire [7:0] sprite_x_sel = oam_eval_extra ? sprite_x : h_cnt; + +genvar j; + +generate + for (j = 0; j < SPRITES_EXTRA; j = j + 1) begin : gen_sprite_extra_store + sprites_extra_store st ( + .clk ( clk ), + .ce ( ce ), + + .reset ( oam_eval_reset ), + + .save_x ( save_x & (tile_fetch_x_index == (j)) ), + .xpos ( sprite_x_sel ), + + .tile_save ( tile_save & (tile_fetch_x_index == (j)) ), + .tile0_in ( tile_data_0 ), + .tile1_in ( tile_data_in ), + .index_in ( tile_fetch_sprite_index ), + .cgb_pal_in ( tile_attr[2:0] ), + .pal_in ( tile_attr[4] ), + .prio_in ( tile_attr[7] ), + + .x_match ( sprite_x_matches[j] ), + .tile0_o ( spr_tile0 ), + .tile1_o ( spr_tile1 ), + .pal_o ( spr_pal ), + .prio_o ( spr_prio ), + .cgb_pal_o ( spr_cgb_pal ), + .index_o ( spr_index ) + ); + end +endgenerate + +assign spr_found = |{sprite_x_matches} & extra_spr_en; + +endmodule \ No newline at end of file diff --git a/rtl/sprites_extra_store.v b/rtl/sprites_extra_store.v new file mode 100644 index 0000000..5f562bb --- /dev/null +++ b/rtl/sprites_extra_store.v @@ -0,0 +1,68 @@ +module sprites_extra_store ( + input clk, + input ce, + + input reset, + + input save_x, + input [7:0] xpos, + + input tile_save, + input [7:0] tile0_in, + input [7:0] tile1_in, + input [3:0] index_in, + input [2:0] cgb_pal_in, + input pal_in, + input prio_in, + + output x_match, + + output [7:0] tile0_o, + output [7:0] tile1_o, + output [2:0] cgb_pal_o, + output [3:0] index_o, + output pal_o, + output prio_o +); + +reg [7:0] x; +reg [7:0] tile0; +reg [7:0] tile1; +reg [2:0] cgb_pal; +reg [3:0] index; +reg pal; +reg prio; + +always @(posedge clk) begin + if (ce) begin + if (reset) begin + x <= 8'hFF; + tile0 <= 8'd0; + tile1 <= 8'd0; + end else begin + if (save_x) begin + x <= xpos; + end + + if (tile_save) begin + tile0 <= tile0_in; + tile1 <= tile1_in; + pal <= pal_in; + prio <= prio_in; + cgb_pal <= cgb_pal_in; + index <= index_in; + end + end + end +end + +assign x_match = (xpos == x); + +assign tile0_o = x_match ? tile0 : 8'hZZ; +assign tile1_o = x_match ? tile1 : 8'hZZ; +assign pal_o = x_match ? pal : 1'bZ; +assign prio_o = x_match ? prio : 1'bZ; +assign cgb_pal_o = x_match ? cgb_pal : 3'hZ; +assign index_o = x_match ? index : 4'hZ; + +endmodule \ No newline at end of file diff --git a/rtl/video.v b/rtl/video.v index cd81d70..c36dd0f 100644 --- a/rtl/video.v +++ b/rtl/video.v @@ -23,6 +23,7 @@ module video ( input reset, input clk, input ce, // 4 Mhz cpu clock + input ce_n, // falling edge input ce_cpu, // 4 or 8Mhz input isGBC, input isGBC_mode, @@ -64,6 +65,9 @@ module video ( output [15:0] dma_addr, input [7:0] dma_data, + input extra_spr_en, + input extra_wait, + // savestates input [7:0] Savestate_OAMRAMAddr, input Savestate_OAMRAMRWrEn, @@ -206,6 +210,8 @@ reg[7:0] obpd [63:0]; //64 bytes reg ff6c_opri; reg obj_prio_dmg_mode; +integer i; + // -------------------------------------------------------------------- // ----------------------------- DMA engine --------------------------- // -------------------------------------------------------------------- @@ -370,7 +376,7 @@ assign mode = mode3_l & ~mode3_end ? 2'b11 : 2'b00; -assign oam_cpu_allow = ~(oam_eval | mode3); +assign oam_cpu_allow = ~(oam_eval | mode3 | dma_active); assign vram_cpu_allow = ~mode3; // -------------------------------------------------------------------- @@ -829,21 +835,24 @@ wire [2:0] spr_cgb_pal = sprite_attr[2:0]; wire [7:0] spr_vram_data = (isGBC & isGBC_mode & spr_attr_cgb_bank) ? vram1_data : vram_data; wire [7:0] spr_tile_data_in = spr_attr_h_flip ? bit_reverse(spr_vram_data) : spr_vram_data; -// CGB sprite priority. Non-transparent pixels with lower sprite_index have priority. -function [7:0] spr_cgb_prio; - input [7:0] a3,a2,a1,a0; - integer i; - begin - for (i=0;i<8;i=i+1) - spr_cgb_prio[i] = (sprite_index < {a3[i], a2[i], a1[i], a0[i]}) & (spr_tile_data_in[i] | spr_tile_data0[i]); - end -endfunction - -wire [7:0] spr_cgb_index_prio = spr_cgb_prio(spr_cgb_index_shift[3], spr_cgb_index_shift[2], spr_cgb_index_shift[1], spr_cgb_index_shift[0]); - // DMG sprite pixels are only loaded into the shift register if the old pixel is transparent. -// CGB will mask the old pixel to 0 if the new pixel has higher priority. -wire [7:0] spr_tile_mask = (spr_tile_shift_0 | spr_tile_shift_1) & ((isGBC & ~obj_prio_dmg_mode) ? ~spr_cgb_index_prio : 8'hFF); +wire [7:0] spr_pixel_empty = ~(spr_tile_shift_0 | spr_tile_shift_1); + +// CGB sprite priority. Non-transparent pixels with lower sprite_index have priority. +reg [7:0] spr_cgb_higher_prio, spr_extra_cgb_higher_prio; +always @(*) begin + for (i = 0; i < 8; i = i + 1) begin + spr_cgb_higher_prio[i] = (sprite_index < {spr_cgb_index_shift[3][i], spr_cgb_index_shift[2][i], spr_cgb_index_shift[1][i], spr_cgb_index_shift[0][i]}) & (spr_tile_data_in[i] | spr_tile_data0[i]); + spr_extra_cgb_higher_prio[i] = (spr_extra_index < {spr_cgb_index_shift[3][i], spr_cgb_index_shift[2][i], spr_cgb_index_shift[1][i], spr_cgb_index_shift[0][i]}) & (spr_extra_tile[0][i] | spr_extra_tile[1][i]); + end +end + +wire [7:0] spr_extra_tile[0:1]; +wire [2:0] spr_extra_cgb_pal; +wire [3:0] spr_extra_index; +wire spr_extra_pal; +wire spr_extra_prio; +wire spr_extra_found; // cycle through the B01s states wire bg_tile_map_rd = (mode3 && bg_fetch_cycle[2:1] == 2'b00); @@ -879,40 +888,6 @@ always @(posedge clk) begin if(bg_tile_data1_rd) bg_tile_data1 <= bg_vram_data_in; end - // Shift sprite data out - if (~pcnt_paused) begin - spr_tile_shift_0 <= spr_tile_shift_0 << 1; - spr_tile_shift_1 <= spr_tile_shift_1 << 1; - spr_pal_shift <= spr_pal_shift << 1; - spr_prio_shift <= spr_prio_shift << 1; - spr_cgb_pal_shift[0] <= spr_cgb_pal_shift[0] << 1; - spr_cgb_pal_shift[1] <= spr_cgb_pal_shift[1] << 1; - spr_cgb_pal_shift[2] <= spr_cgb_pal_shift[2] << 1; - spr_cgb_index_shift[0] <= spr_cgb_index_shift[0] << 1; - spr_cgb_index_shift[1] <= spr_cgb_index_shift[1] << 1; - spr_cgb_index_shift[2] <= spr_cgb_index_shift[2] << 1; - spr_cgb_index_shift[3] <= spr_cgb_index_shift[3] << 1; - end - - // Fetch sprite new data - if (sprite_fetch_cycle[0]) begin - if(bg_tile_obj0_rd) spr_tile_data0 <= spr_tile_data_in; - - if(bg_tile_obj1_rd) begin - spr_tile_shift_0 <= (spr_tile_shift_0 & spr_tile_mask) | ( spr_tile_data0 & ~spr_tile_mask); - spr_tile_shift_1 <= (spr_tile_shift_1 & spr_tile_mask) | ( spr_tile_data_in & ~spr_tile_mask); - spr_pal_shift <= (spr_pal_shift & spr_tile_mask) | ({8{spr_pal}} & ~spr_tile_mask); - spr_prio_shift <= (spr_prio_shift & spr_tile_mask) | ({8{spr_prio}} & ~spr_tile_mask); - spr_cgb_pal_shift[0] <= (spr_cgb_pal_shift[0] & spr_tile_mask) | ({8{spr_cgb_pal[0]}} & ~spr_tile_mask); - spr_cgb_pal_shift[1] <= (spr_cgb_pal_shift[1] & spr_tile_mask) | ({8{spr_cgb_pal[1]}} & ~spr_tile_mask); - spr_cgb_pal_shift[2] <= (spr_cgb_pal_shift[2] & spr_tile_mask) | ({8{spr_cgb_pal[2]}} & ~spr_tile_mask); - spr_cgb_index_shift[0] <= (spr_cgb_index_shift[0] & spr_tile_mask) | ({8{sprite_index[0]}} & ~spr_tile_mask); - spr_cgb_index_shift[1] <= (spr_cgb_index_shift[1] & spr_tile_mask) | ({8{sprite_index[1]}} & ~spr_tile_mask); - spr_cgb_index_shift[2] <= (spr_cgb_index_shift[2] & spr_tile_mask) | ({8{sprite_index[2]}} & ~spr_tile_mask); - spr_cgb_index_shift[3] <= (spr_cgb_index_shift[3] & spr_tile_mask) | ({8{sprite_index[3]}} & ~spr_tile_mask); - end - end - if (~&bg_fetch_cycle) begin bg_fetch_cycle <= bg_fetch_cycle + 1'b1; end @@ -966,8 +941,77 @@ always @(posedge clk) begin end +always @(posedge clk) begin + if (reset) begin + + end else begin + + if (ce) begin + // Shift sprite data out + if (~pcnt_paused) begin + spr_tile_shift_0 <= spr_tile_shift_0 << 1; + spr_tile_shift_1 <= spr_tile_shift_1 << 1; + spr_pal_shift <= spr_pal_shift << 1; + spr_prio_shift <= spr_prio_shift << 1; + spr_cgb_pal_shift[0] <= spr_cgb_pal_shift[0] << 1; + spr_cgb_pal_shift[1] <= spr_cgb_pal_shift[1] << 1; + spr_cgb_pal_shift[2] <= spr_cgb_pal_shift[2] << 1; + spr_cgb_index_shift[0] <= spr_cgb_index_shift[0] << 1; + spr_cgb_index_shift[1] <= spr_cgb_index_shift[1] << 1; + spr_cgb_index_shift[2] <= spr_cgb_index_shift[2] << 1; + spr_cgb_index_shift[3] <= spr_cgb_index_shift[3] << 1; + end + + // Fetch sprite new data + if (sprite_fetch_cycle[0]) begin + if(bg_tile_obj0_rd) spr_tile_data0 <= spr_tile_data_in; + + if(bg_tile_obj1_rd) begin + for (i = 0; i < 8; i = i + 1) begin + if (spr_pixel_empty[i] | (isGBC & ~obj_prio_dmg_mode & spr_cgb_higher_prio[i])) begin + spr_tile_shift_0[i] <= spr_tile_data0[i]; + spr_tile_shift_1[i] <= spr_tile_data_in[i]; + spr_pal_shift[i] <= spr_pal; + spr_prio_shift[i] <= spr_prio; + spr_cgb_pal_shift[0][i] <= spr_cgb_pal[0]; + spr_cgb_pal_shift[1][i] <= spr_cgb_pal[1]; + spr_cgb_pal_shift[2][i] <= spr_cgb_pal[2]; + spr_cgb_index_shift[0][i] <= sprite_index[0]; + spr_cgb_index_shift[1][i] <= sprite_index[1]; + spr_cgb_index_shift[2][i] <= sprite_index[2]; + spr_cgb_index_shift[3][i] <= sprite_index[3]; + end + end + end + end + end + + // Load extra sprite + if (ce_n) begin + if (~pcnt_paused & spr_extra_found) begin + for (i = 0; i < 8; i = i + 1) begin + if (spr_pixel_empty[i] | (isGBC & ~obj_prio_dmg_mode & spr_extra_cgb_higher_prio[i])) begin + spr_tile_shift_0[i] <= spr_extra_tile[0][i]; + spr_tile_shift_1[i] <= spr_extra_tile[1][i]; + spr_pal_shift[i] <= spr_extra_pal; + spr_prio_shift[i] <= spr_extra_prio; + spr_cgb_pal_shift[0][i] <= spr_extra_cgb_pal[0]; + spr_cgb_pal_shift[1][i] <= spr_extra_cgb_pal[1]; + spr_cgb_pal_shift[2][i] <= spr_extra_cgb_pal[2]; + spr_cgb_index_shift[0][i] <= spr_extra_index[0]; + spr_cgb_index_shift[1][i] <= spr_extra_index[1]; + spr_cgb_index_shift[2][i] <= spr_extra_index[2]; + spr_cgb_index_shift[3][i] <= spr_extra_index[3]; + end + end + end + end + end +end + assign vram_rd = lcdc_on && (bg_tile_map_rd || bg_tile_data0_rd || - bg_tile_data1_rd || bg_tile_obj0_rd || bg_tile_obj1_rd); + bg_tile_data1_rd || bg_tile_obj0_rd || + bg_tile_obj1_rd || tile_obj_extra_rd); wire bg_tile_a12 = !lcdc_tile_data_sel?(~bg_tile[7]):1'b0; @@ -976,12 +1020,16 @@ wire tile_map_sel = window_ena?lcdc_win_tile_map_sel:lcdc_bg_tile_map_sel; //GBC: check if flipped y wire [2:0] tile_line_flip = (isGBC && isGBC_mode && bg_tile_attr_new[6]) ? ~tile_line : tile_line; +wire tile_obj_extra_rd; +wire [11:0] sprite_extra_addr; + assign vram_addr = bg_tile_map_rd?{2'b11, tile_map_sel, bg_tile_map_addr}: bg_tile_data0_rd?{bg_tile_a12, bg_tile, tile_line_flip, 1'b0}: bg_tile_data1_rd?{bg_tile_a12, bg_tile, tile_line_flip, 1'b1}: bg_tile_obj0_rd ? {1'b0, sprite_addr, 1'b0} : - {1'b0, sprite_addr, 1'b1}; + bg_tile_obj1_rd ? {1'b0, sprite_addr, 1'b1} : + {1'b0, sprite_extra_addr }; sprites sprites ( .clk ( clk ), @@ -1004,6 +1052,7 @@ sprites sprites ( .sprite_addr ( sprite_addr ), .sprite_attr ( sprite_attr ), .sprite_index ( sprite_index ), + .sprite_fetch_c1 ( sprite_fetch_cycle == 3'd1 ), .sprite_fetch_done ( sprite_fetch_done) , .dma_active ( dma_active), @@ -1012,6 +1061,21 @@ sprites sprites ( .oam_di ( oam_di ), .oam_do ( oam_do ), + // For extra sprites + .extra_spr_en ( extra_spr_en ), + .extra_wait ( extra_wait ), + .tile_data_in ( spr_tile_data_in ), + .extra_tile_fetch ( tile_obj_extra_rd ), + .extra_tile_addr ( sprite_extra_addr ), + + .spr_extra_found ( spr_extra_found ), + .spr_extra_tile0 ( spr_extra_tile[0] ), + .spr_extra_tile1 ( spr_extra_tile[1] ), + .spr_extra_pal ( spr_extra_pal ), + .spr_extra_prio ( spr_extra_prio ), + .spr_extra_cgb_pal( spr_extra_cgb_pal ), + .spr_extra_index ( spr_extra_index ), + .Savestate_OAMRAMAddr (Savestate_OAMRAMAddr), .Savestate_OAMRAMRWrEn (Savestate_OAMRAMRWrEn), .Savestate_OAMRAMWriteData (Savestate_OAMRAMWriteData),