// Gameboy for the MiST // (c) 2015 Till Harbaum // The gameboy lcd runs from a shift register which is filled at 4194304 pixels/sec module lcd ( input clk_sys, input ce, input lcd_clkena, input lcd_vs, input shadow, input [14:0] data, input [1:0] mode, input isGBC, input double_buffer, //palette input [23:0] pal1, input [23:0] pal2, input [23:0] pal3, input [23:0] pal4, input lut_download, input ioctl_wr, input [15:0] ioctl_addr, input [15:0] ioctl_dout, input [15:0] sgb_border_pix, input sgb_pal_en, input sgb_en, input sgb_freeze, input tint, input inv, input frame_blend, input originalcolors, input analog_wide, input on, // VGA output input clk_vid, // 67.108864 MHz output reg ce_pix, output reg hs, output reg vs, output reg hbl, output reg vbl, output reg [8:0] h_cnt, output reg [8:0] v_cnt, output [7:0] r, output [7:0] g, output [7:0] b, output h_end ); reg [14:0] vbuffer_inptr; reg vbuffer_in_bank; reg lcd_off, lcd_freeze; reg blank_de, blank_output; reg [14:0] blank_data; reg [16:0] lcd_off_cnt; localparam BLANK_DELAY = 456*154; wire pix_wr = ce & ( (lcd_clkena & ~lcd_freeze & ~sgb_freeze) | blank_de); always @(posedge clk_sys) begin reg old_lcd_off, old_lcd_vs; reg [8:0] blank_hcnt,blank_vcnt; lcd_off <= !on || (mode == 2'd01); blank_de <= (!on && blank_output && blank_hcnt < 160 && blank_vcnt < 144); if (pix_wr) vbuffer_inptr <= vbuffer_inptr + 1'd1; old_lcd_off <= lcd_off; if(old_lcd_off ^ lcd_off) begin vbuffer_inptr <= 0; if (lcd_off) begin //LCD disabled or VBlank if(~lcd_freeze & ~sgb_freeze) vbuffer_in_bank <= ~vbuffer_in_bank; end end // Delay blanking the screen for GBC if (on) lcd_off_cnt <= 0; else if (ce & ~&lcd_off_cnt) lcd_off_cnt <= lcd_off_cnt + 1'b1; if (~on) begin // LCD disabled, start blank output lcd_freeze <= 1; if ( (~isGBC | (lcd_off_cnt > BLANK_DELAY) ) & ~blank_output) begin blank_output <= 1'b1; {blank_hcnt,blank_vcnt} <= 0; end end // Regenerate LCD timings for filling with blank color when LCD is off if (ce & ~on & blank_output) begin blank_data <= data; blank_hcnt <= blank_hcnt + 1'b1; if (blank_hcnt == 9'd455) begin blank_hcnt <= 0; blank_vcnt <= blank_vcnt + 1'b1; if (blank_vcnt == 9'd153) begin blank_vcnt <= 0; vbuffer_inptr <= 0; vbuffer_in_bank <= ~vbuffer_in_bank; end end end // Output 1 blank/repeated frame until VSync after LCD is enabled old_lcd_vs <= lcd_vs; if (~old_lcd_vs & lcd_vs) begin if (lcd_freeze) lcd_freeze <= 0; if (blank_output) blank_output <= 0; end end reg [14:0] vbuffer[65536]; always @(posedge clk_sys) if(pix_wr) vbuffer[{vbuffer_in_bank, vbuffer_inptr}] <= (on & blank_output) ? blank_data : data; // Mode 00: h-blank // Mode 01: v-blank // Mode 10: oam // Mode 11: oam and vram // Narrow parameter H = 9'd160; // width of visible area parameter HFP = 9'd103; // unused time before hsync parameter HS = 9'd32; // width of hsync parameter HBP = 9'd130; // unused time after hsync parameter HTOTAL = H+HFP+HS+HBP; // total = 425 // Wide parameter HFP_W = 9'd76; parameter HS_W = 9'd26; parameter HBP_W = 9'd92; parameter HTOTAL_W = H+HFP_W+HS_W+HBP_W; // total = 354 parameter H_BORDER = 9'd48; parameter V_BORDER = 9'd40; parameter H_START = 9'd9+H_BORDER; parameter V = 144; // height of visible area parameter VS_START = 37; // start of vsync parameter VSTART = 105; // start of active video parameter VTOTAL = 264; wire [8:0] h_total = analog_wide ? HTOTAL_W : HTOTAL; wire [8:0] hs_start = analog_wide ? (H_START+H+HFP_W) : (H_START+H+HFP); wire [8:0] hs_end = analog_wide ? (H_START+H+HFP_W+HS_W) : (H_START+H+HFP+HS); assign h_end = (h_cnt == h_total-1); // (67108864 / 32 / 228 / 154) == (67108864 / 10 / 425.6 / 264) == 59.7275Hz // We need 4256 cycles per line so 1 pixel clock cycle needs to be 6 cycles longer. // Narrow: 424x10 + 1x16 cycles // Wide: 352x12 + 2x16 cycles reg [3:0] pix_div_cnt; reg ce_pix_n; always @(posedge clk_vid) begin pix_div_cnt <= pix_div_cnt + 1'd1; // Longer cycle at the last pixel(s) if ( (~analog_wide && ~h_end && pix_div_cnt == 4'd9) || (analog_wide && h_cnt < h_total-2 && pix_div_cnt == 4'd11) ) pix_div_cnt <= 0; ce_pix <= !pix_div_cnt; ce_pix_n <= (pix_div_cnt == 4'd5); end reg [14:0] vbuffer_outptr; reg vbuffer_out_bank; reg [1:0] shadow_buf[160]; reg hb, vb, gb_hb, gb_vb, wait_vbl; always @(posedge clk_vid) begin reg [14:0] inptr,inptr1,inptr2; reg old_lcd_off; reg old_on; inptr2 <= vbuffer_inptr; inptr1 <= inptr2; if(inptr1 == inptr2) inptr <= inptr1; if (ce_pix_n) begin // generate positive hsync signal if(h_cnt == hs_end) hs <= 0; if(h_cnt == hs_start) begin hs <= 1; // generate positive vsync signal if(v_cnt == VS_START) vs <= 1; if(v_cnt == VS_START+3) vs <= 0; end // Hblank if(h_cnt == H_START) gb_hb <= 0; if(h_cnt == H_START+H) gb_hb <= 1; if(h_cnt == H_START-H_BORDER) hb <= 0; if(h_cnt == H_START+H_BORDER+H) hb <= 1; // Vblank if(v_cnt == VSTART) gb_vb <= 0; if(v_cnt == VSTART+V) gb_vb <= 1; if(v_cnt == VSTART-V_BORDER) vb <= 0; if(v_cnt == VSTART+V_BORDER+V-VTOTAL) vb <= 1; end if(ce_pix) begin h_cnt <= h_cnt + 1'd1; if(h_end) begin h_cnt <= 0; if(~(vb & wait_vbl) | double_buffer) v_cnt <= v_cnt + 1'd1; if(v_cnt >= VTOTAL-1) v_cnt <= 0; if(v_cnt == VSTART-1) begin vbuffer_outptr <= 0; // Read from write buffer if it is far enough ahead vbuffer_out_bank <= (inptr >= (160*60) || ~double_buffer) ? vbuffer_in_bank : ~vbuffer_in_bank; end end // visible area? if(~gb_hb & ~gb_vb) begin vbuffer_outptr <= vbuffer_outptr + 1'd1; end end old_lcd_off <= lcd_off; old_on <= on; if (~double_buffer) begin // Lcd turned on. Wait in vblank for output reset. if (~old_on & on & ~vb) wait_vbl <= 1'b1; // lcd enabled if (old_lcd_off & ~lcd_off & vb) begin // lcd enabled or out of vblank wait_vbl <= 0; h_cnt <= 0; v_cnt <= 0; hs <= 0; vs <= 0; end end end // ------------------------------------------------------------------------------- // ------------------------------- pixel generator ------------------------------- // ------------------------------------------------------------------------------- reg [14:0] pixel_reg; reg [7:0] shptr = 0; always @(posedge clk_vid) pixel_reg <= vbuffer[{vbuffer_out_bank, vbuffer_outptr}]; // Previous frame data for frame blend reg [14:0] prev_vbuffer[160*144]; reg [14:0] prev_pixel_reg; always @(posedge clk_vid) begin if(ce_pix) begin if (~gb_hb & ~gb_vb) begin prev_vbuffer[vbuffer_outptr] <= pixel_reg; shadow_buf[shptr] <= pixel; end shptr <= (shptr == 159) ? 8'd0 : shptr + 1'd1; end if (gb_hb) shptr <= 0; if (gb_vb) shadow_buf[shptr] <= 2'd0; prev_pixel_reg <= prev_vbuffer[vbuffer_outptr]; end // Current pixel_reg latched at ce_pix_n so it is ready at ce_pix reg [14:0] pixel_out; always@(posedge clk_vid) begin if (ce_pix_n) pixel_out <= pixel_reg; else if (ce_pix) pixel_out <= prev_pixel_reg; end wire [1:0] pixel = (pixel_out[1:0] ^ {inv,inv}); //invert gb only wire [4:0] r5 = pixel_out[4:0]; wire [4:0] g5 = pixel_out[9:5]; wire [4:0] b5 = pixel_out[14:10]; // greyscale wire [7:0] grey = (pixel==0) ? 8'd252 : (pixel==1) ? 8'd168 : (pixel==2) ? 8'd96 : 8'd0; // sgb_border_pix contains backdrop color when sgb_border_pix[15] is low. wire sgb_border = sgb_border_pix[15] & sgb_en; function [7:0] blend; input [7:0] a,b; reg [8:0] sum; begin sum = a + b; blend = sum[8:1]; end endfunction // Color LUT 16bit to 24bit writes reg [23:0] color_lut_data; reg [7:0] color_lut_temp; reg [1:0] color_cnt; reg [14:0] color_lut_wr_addr; reg color_lut_wr, prev_color_lut_wr; reg prev_lut_download; always@(posedge clk_sys) begin prev_lut_download <= lut_download; if (~prev_lut_download & lut_download) begin color_cnt <= 2'd0; color_lut_wr_addr <= 15'd0; end prev_color_lut_wr <= color_lut_wr; if (prev_color_lut_wr & ~color_lut_wr) begin color_lut_wr_addr <= color_lut_wr_addr + 1'b1; end color_lut_wr <= 0; if (lut_download & ioctl_wr) begin color_cnt <= color_cnt + 1'b1; case (color_cnt) 2'd0: begin color_lut_data[15:0] <= ioctl_dout[15:0]; end 2'd1: begin color_lut_data[23:16] <= ioctl_dout[7:0]; color_lut_temp <= ioctl_dout[15:8]; color_lut_wr <= 1'b1; end 2'd2: begin color_lut_data[23:0] <= { ioctl_dout[15:0], color_lut_temp }; color_lut_wr <= 1'b1; color_cnt <= 2'd0; end endcase end end wire [23:0] color_lut_dout; // Color 3D LUT dpram #(15,24, "lcd_color_lut.mif") lcd_color_lut ( .clock_a (clk_vid), .address_a ( { r5, g5, b5 } ), .q_a (color_lut_dout), .clock_b (clk_sys), .address_b (color_lut_wr_addr), .data_b (color_lut_data), .wren_b (color_lut_wr), .q_b () ); reg [7:0] r_tmp, g_tmp, b_tmp; always@(*) begin if (~sgb_pal_en & isGBC & !originalcolors) begin r_tmp = color_lut_dout[7:0]; g_tmp = color_lut_dout[15:8]; b_tmp = color_lut_dout[23:16]; end else if (sgb_pal_en | (isGBC & originalcolors)) begin r_tmp = {r5,r5[4:2]}; g_tmp = {g5,g5[4:2]}; b_tmp = {b5,b5[4:2]}; end else if (tint) begin {r_tmp,g_tmp,b_tmp} = (pixel==0) ? pal1 : (pixel==1) ? pal2 : (pixel==2) ? pal3 : pal4; end else begin {r_tmp,g_tmp,b_tmp} = {3{grey}}; end end reg [7:0] r_prev, g_prev, b_prev; reg [7:0] r_cur, g_cur, b_cur; reg [14:0] sgb_border_d; reg hbl_l, vbl_l; reg border_en; reg [1:0] sc1, sc; reg [7:0] rt, gt, bt; reg shadow_end1, shadow_end2; wire shadow_en = shadow && ~isGBC ; assign r = shadow_end2 ? ((rt >> 1) + (rt >> 2) + (~sc[1] ? (rt >> 3) : 1'd0) + (~sc[0] ? (rt >> 4) : 1'd0)) : rt; assign g = shadow_end2 ? ((gt >> 1) + (gt >> 2) + (~sc[1] ? (gt >> 3) : 1'd0) + (~sc[0] ? (gt >> 4) : 1'd0)) : gt; assign b = shadow_end2 ? ((bt >> 1) + (bt >> 2) + (~sc[1] ? (bt >> 3) : 1'd0) + (~sc[0] ? (bt >> 4) : 1'd0)) : bt; always@(posedge clk_vid) begin if (ce_pix) begin {r_cur, g_cur, b_cur} <= {r_tmp, g_tmp, b_tmp}; shadow_end1 <= shadow_en && (|shadow_buf[shptr]) && (pixel == 0); sc1 <= shadow_buf[shptr]; sc <= sc1; shadow_end2 <= (shadow_end1 && ~border_en); end if (ce_pix_n) {r_prev, g_prev, b_prev} <= {r_tmp, g_tmp, b_tmp}; if (ce_pix) begin // visible area? hbl_l <= sgb_en ? hb : gb_hb; vbl_l <= sgb_en ? vb : gb_vb; hbl <= hbl_l; vbl <= vbl_l; // Allow backdrop color in border area and the border to overlap game area. border_en <= ((gb_hb|gb_vb) & sgb_en) | sgb_border; sgb_border_d <= sgb_border_pix[14:0]; if (border_en) begin rt <= {sgb_border_d[4:0],sgb_border_d[4:2]}; gt <= {sgb_border_d[9:5],sgb_border_d[9:7]}; bt <= {sgb_border_d[14:10],sgb_border_d[14:12]}; end else if (frame_blend) begin rt <= blend(r_cur, r_prev); gt <= blend(g_cur, g_prev); bt <= blend(b_cur, b_prev); end else begin {rt,gt,bt} <= {r_cur, g_cur, b_cur}; end end end endmodule