Files
SharpMZ/zpu/cpu/zpu_core_flex.vhd

1185 lines
70 KiB
VHDL
Raw Permalink Blame History

-- ZPU (flex variant)
--
-- Copyright 2004-2008 oharboe - <20>yvind Harboe - oyvind.harboe@zylin.com
--
-- Changes by Alastair M. Robinson, 2013
-- to allow the core to run from external RAM, and to balance performance and area.
-- The goal is to make the ZPU a useful support CPU for such tasks as loading
-- ROMs from SD Card, while keeping the area under 1,000 logic cells.
-- To this end, there are a number of generics which can be used to adjust the
-- speed / area balance.
--
-- The FreeBSD license
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
-- 1. Redistributions of source code must retain the above copyright
-- notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above
-- copyright notice, this list of conditions and the following
-- disclaimer in the documentation and/or other materials
-- provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE ZPU PROJECT ``AS IS'' AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-- THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-- ZPU PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-- ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
-- The views and conclusions contained in the software and documentation
-- are those of the authors and should not be interpreted as representing
-- official policies, either expressed or implied, of the ZPU Project.
-- WARNING - the stack bit has changed from bit 26 to bit 30.
-- RTL code which relies upon this will need updating.
-- Provided the linkscripts and CPU are kept in sync,
-- this change should be essentially invisible to the user.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.zpu_pkg.all;
entity zpu_core_flex is
generic (
IMPL_MULTIPLY : boolean; -- Self explanatory
IMPL_COMPARISON_SUB : boolean; -- Include sub and (U)lessthan(orequal)
IMPL_EQBRANCH : boolean; -- Include eqbranch and neqbranch
IMPL_STOREBH : boolean; -- Include halfword and byte writes
IMPL_LOADBH : boolean; -- Include halfword and byte reads
IMPL_CALL : boolean; -- Include call
IMPL_SHIFT : boolean; -- Include lshiftright, ashiftright and ashiftleft
IMPL_XOR : boolean; -- include xor instruction
-- REMAP_STACK : boolean; -- Map the stack / Boot ROM to an address specific by "stackbit" - default 0x40000000
CACHE : boolean; -- Cache - only 32-bits but reduces re-fetching and speeds up consecutive IMs in particular.
-- stackbit : integer -- Specify base address of stack - defaults to 0x40000000
CLK_FREQ : integer := 100000000; -- Frequency of the input clock.
STACK_ADDR : integer := 0 -- Initial stack address on CPU start.
);
port (
clk : in std_logic;
-- asynchronous reset signal
reset : in std_logic;
-- this particular implementation of the ZPU does not
-- have a clocked enable signal
enable : in std_logic;
in_mem_busy : in std_logic;
mem_read : in std_logic_vector(WORD_32BIT_RANGE);
mem_write : out std_logic_vector(WORD_32BIT_RANGE);
out_mem_addr : out std_logic_vector(ADDR_BIT_RANGE);
out_mem_writeEnable : out std_logic;
out_mem_bEnable : out std_logic; -- Enable byte write
out_mem_hEnable : out std_logic; -- Enable halfword write
out_mem_readEnable : out std_logic;
-- Set to one to jump to interrupt vector
-- The ZPU will communicate with the hardware that caused the
-- interrupt via memory mapped IO or the interrupt flag can
-- be cleared automatically
interrupt_request : in std_logic;
interrupt_ack : out std_logic; -- Interrupt acknowledge, ZPU has entered Interrupt Service Routine.
interrupt_done : out std_logic; -- Interrupt service routine completed/done.
-- Signal that the break instruction is executed, normally only used
-- in simulation to stop simulation
break : out std_logic;
debug_txd : out std_logic; -- Debug serial output.
--
MEM_A_WRITE_ENABLE : out std_logic;
MEM_A_ADDR : out std_logic_vector(ADDR_32BIT_RANGE);
MEM_A_WRITE : out std_logic_vector(WORD_32BIT_RANGE);
MEM_B_WRITE_ENABLE : out std_logic;
MEM_B_ADDR : out std_logic_vector(ADDR_32BIT_RANGE);
MEM_B_WRITE : out std_logic_vector(WORD_32BIT_RANGE);
MEM_A_READ : in std_logic_vector(WORD_32BIT_RANGE);
MEM_B_READ : in std_logic_vector(WORD_32BIT_RANGE)
);
end zpu_core_flex;
architecture behave of zpu_core_flex is
-- state machine.
type State_Type is (
State_Fetch,
State_WriteIODone,
State_Execute,
State_StoreToStack,
State_Add,
State_Or,
State_And,
State_Xor,
State_Store,
State_ReadIO,
State_ReadIOBH,
State_WriteIO,
State_WriteIOBH,
State_Load,
State_FetchNext,
State_AddSP,
State_AddSP2,
State_ReadIODone,
State_StoreAndDecode,
State_Decode,
State_Resync,
State_Interrupt,
State_Mult,
State_Comparison,
State_EqNeq,
State_Sub,
State_IncSP,
State_Shift,
State_Debug
);
type DecodedOpcodeType is (
Decoded_Nop,
Decoded_Im,
Decoded_ImShift,
Decoded_LoadSP,
Decoded_StoreSP ,
Decoded_AddSP,
Decoded_Emulate,
Decoded_Break,
Decoded_PushSP,
Decoded_PopPC,
Decoded_Add,
Decoded_Or,
Decoded_And,
Decoded_Load,
Decoded_LoadBH,
Decoded_Not,
Decoded_Xor,
Decoded_Flip,
Decoded_Store,
Decoded_StoreBH,
Decoded_PopSP,
Decoded_Interrupt,
Decoded_Mult,
Decoded_Sub,
Decoded_Comparison,
Decoded_EqNeq,
Decoded_EqBranch,
Decoded_Call,
Decoded_Shift
);
--
type DebugType is
(
Debug_Start,
Debug_DumpFifo,
Debug_DumpFifo_1,
Debug_End
);
-- start byte address of stack.
-- point to top of RAM - 2*words
--constant spStart : unsigned(spStart(ADDR_32BIT_RANGE));
--std_logic_vector(ADDR_BIT_RANGE) := std_logic_vector(to_unsigned((2**(maxAddrBitBRAM+1))-8, maxAddrBit));
signal memAWriteEnable : std_logic;
signal memAAddr : unsigned(ADDR_32BIT_RANGE);
signal memAWrite : unsigned(WORD_32BIT_RANGE);
signal memARead : unsigned(WORD_32BIT_RANGE);
signal memBWriteEnable : std_logic;
signal memBAddr : unsigned(ADDR_32BIT_RANGE);
signal memBWrite : unsigned(WORD_32BIT_RANGE);
signal memBRead : unsigned(WORD_32BIT_RANGE);
signal pc : unsigned(ADDR_BIT_RANGE); -- Synthesis tools should reduce this automatically
signal sp : unsigned(ADDR_32BIT_RANGE);
signal interrupt_suspended_addr : unsigned(ADDR_BIT_RANGE); -- Save address which got interrupted.
-- this signal is set upon executing an IM instruction
-- the subsequence IM instruction will then behave differently.
-- all other instructions will clear the idim_flag.
-- this yields highly compact immediate instructions.
signal idim_flag : std_logic;
--
signal busy : std_logic;
--
signal begin_inst : std_logic;
signal fetchneeded : std_logic;
signal trace_opcode : std_logic_vector(7 downto 0);
signal trace_pc : std_logic_vector(ADDR_BIT_RANGE);
signal trace_sp : std_logic_vector(ADDR_32BIT_RANGE);
signal trace_topOfStack : std_logic_vector(WORD_32BIT_RANGE);
signal trace_topOfStackB : std_logic_vector(WORD_32BIT_RANGE);
signal debugState : DebugType;
signal debugCnt : integer;
signal debugRec : zpu_dbg_t;
signal debugLoad : std_logic;
signal debugReady : std_logic;
signal programword : std_logic_vector(WORD_32BIT_RANGE);
signal cachedprogramword : std_logic_vector(WORD_32BIT_RANGE);
signal inrom : std_logic;
signal sampledOpcode : std_logic_vector(OpCode_Size-1 downto 0);
signal opcode : std_logic_vector(OpCode_Size-1 downto 0);
signal opcode_saved : std_logic_vector(OpCode_Size-1 downto 0);
--
signal decodedOpcode : DecodedOpcodeType;
signal sampledDecodedOpcode : DecodedOpcodeType;
signal state : State_Type;
--
subtype index is std_logic_vector(2 downto 0);
--
signal tOpcode_sel : index;
--
signal inInterrupt : std_logic;
signal comparison_sub_result : unsigned(wordSize downto 0); -- Extra bit needed for signed comparisons
signal comparison_sign_mod : std_logic;
signal comparison_eq : std_logic;
signal eqbranch_zero : std_logic;
signal shift_done : std_logic;
signal shift_sign : std_logic;
signal shift_count : unsigned(5 downto 0);
signal shift_reg : unsigned(31 downto 0);
signal shift_direction : std_logic;
signal add_low : unsigned(17 downto 0);
begin
-- Wire up the BRAM (RAM/ROM)
MEM_A_ADDR <= std_logic_vector(memAAddr(ADDR_32BIT_RANGE));
MEM_A_WRITE <= std_logic_vector(memAWrite);
MEM_B_ADDR <= std_logic_vector(memBAddr(ADDR_32BIT_RANGE));
MEM_B_WRITE <= std_logic_vector(memBWrite);
memARead <= unsigned(MEM_A_READ);
memBRead <= unsigned(MEM_B_READ);
MEM_A_WRITE_ENABLE <= memAWriteEnable;
MEM_B_WRITE_ENABLE <= memBWriteEnable;
tOpcode_sel(2) <= '1' when CACHE=true and fetchneeded='0' else '0';
tOpcode_sel(1 downto 0) <= std_logic_vector(pc(minAddrBit-1 downto 0));
programword <= MEM_B_READ;
inrom <='1';
-- move out calculation of the opcode to a separate process
-- to make things a bit easier to read
decodeControl : process(programword, cachedprogramword, comparison_sub_result, pc, tOpcode_sel)
variable tOpcode : std_logic_vector(OpCode_Size-1 downto 0);
begin
-- simplify opcode selection a bit so it passes more synthesizers
case (tOpcode_sel) is
when "000" => tOpcode := std_logic_vector(programword(31 downto 24));
when "001" => tOpcode := std_logic_vector(programword(23 downto 16));
when "010" => tOpcode := std_logic_vector(programword(15 downto 8));
when "011" => tOpcode := std_logic_vector(programword(7 downto 0));
when "100" => tOpcode := std_logic_vector(cachedprogramword(31 downto 24));
when "101" => tOpcode := std_logic_vector(cachedprogramword(23 downto 16));
when "110" => tOpcode := std_logic_vector(cachedprogramword(15 downto 8));
when "111" => tOpcode := std_logic_vector(cachedprogramword(7 downto 0));
when others => tOpcode := std_logic_vector(programword(7 downto 0));
end case;
sampledOpcode <= tOpcode;
if (tOpcode(7 downto 7) = OpCode_Im) then
sampledDecodedOpcode <= Decoded_Im;
elsif (tOpcode(7 downto 5) = OpCode_StoreSP) then
sampledDecodedOpcode <= Decoded_StoreSP;
elsif (tOpcode(7 downto 5) = OpCode_LoadSP) then
sampledDecodedOpcode <= Decoded_LoadSP;
elsif (tOpcode(7 downto 5) = OpCode_Emulate) then
sampledDecodedOpcode <= Decoded_Emulate;
if IMPL_CALL=true and tOpcode(5 downto 0) = OpCode_Call then
sampledDecodedOpcode <= Decoded_Call;
end if;
if IMPL_MULTIPLY=true and tOpcode(5 downto 0) = OpCode_Mult then
sampledDecodedOpcode <= Decoded_Mult;
end if;
if IMPL_XOR=true and tOpcode(5 downto 0) = OpCode_Xor then
sampledDecodedOpcode <= Decoded_Xor;
end if;
if IMPL_COMPARISON_SUB=true then
if tOpcode(5 downto 0) = OpCode_Eq or tOpcode(5 downto 0) = OpCode_Neq then
sampledDecodedOpcode <= Decoded_EqNeq;
elsif tOpcode(5 downto 0)= OpCode_Sub then
sampledDecodedOpcode <= Decoded_Sub;
elsif tOpcode(5 downto 0)= OpCode_Lessthanorequal or tOpcode(5 downto 0)= OpCode_Lessthan
or tOpcode(5 downto 0) = OpCode_Ulessthanorequal or tOpcode(5 downto 0)= OpCode_Ulessthan then
sampledDecodedOpcode <= Decoded_Comparison;
end if;
end if;
if IMPL_EQBRANCH=true then
if tOpcode(5 downto 0) = OpCode_EqBranch or tOpcode(5 downto 0)= OpCode_NeqBranch then
sampledDecodedOpcode <= Decoded_EqBranch;
end if;
end if;
if IMPL_STOREBH=true then
if tOpcode(5 downto 0) = OpCode_StoreB or tOpcode(5 downto 0) = OpCode_StoreH then
sampledDecodedOpcode <= Decoded_StoreBH;
end if;
end if;
-- LOADB and LOADH don't do any bitshifting based on address- it's the supporting
-- SOC's responsibility to make sure the result is in the low order bits.
if IMPL_LOADBH=true then
if tOpcode(5 downto 0) = OpCode_LoadB or tOpcode(5 downto 0) = OpCode_LoadH then
-- if tOpcode(5 downto 0) = OpCode_LoadH then -- Disable LoadB for now, since it doesn't yet work.
sampledDecodedOpcode <= Decoded_LoadBH;
end if;
end if;
if IMPL_SHIFT=true then
if tOpcode(5 downto 0) = OpCode_Lshiftright or tOpcode(5 downto 0) = OpCode_Ashiftright or tOpcode(5 downto 0) = OpCode_Ashiftleft then
sampledDecodedOpcode <= Decoded_Shift;
end if;
end if;
elsif (tOpcode(7 downto 4) = OpCode_AddSP) then
sampledDecodedOpcode <= Decoded_AddSP;
else
case tOpcode(3 downto 0) is
when OpCode_Break =>
sampledDecodedOpcode <= Decoded_Break;
when OpCode_PushSP =>
sampledDecodedOpcode <= Decoded_PushSP;
when OpCode_PopPC =>
sampledDecodedOpcode <= Decoded_PopPC;
when OpCode_Add =>
sampledDecodedOpcode <= Decoded_Add;
when OpCode_Or =>
sampledDecodedOpcode <= Decoded_Or;
when OpCode_And =>
sampledDecodedOpcode <= Decoded_And;
when OpCode_Load =>
sampledDecodedOpcode <= Decoded_Load;
when OpCode_Not =>
sampledDecodedOpcode <= Decoded_Not;
when OpCode_Flip =>
sampledDecodedOpcode <= Decoded_Flip;
when OpCode_Store =>
sampledDecodedOpcode <= Decoded_Store;
when OpCode_PopSP =>
sampledDecodedOpcode <= Decoded_PopSP;
when others =>
sampledDecodedOpcode <= Decoded_Nop;
end case; -- tOpcode(3 downto 0)
end if; -- tOpcode
end process;
opcodeControl: process(clk, reset, comparison_sub_result, shift_count, memBRead)
variable spOffset : unsigned(4 downto 0);
variable tMultResult : unsigned(wordSize*2-1 downto 0);
begin
if IMPL_COMPARISON_SUB=true and comparison_sub_result='0'&X"00000000" then
comparison_eq <= '1';
else
comparison_eq <= '0';
end if;
if IMPL_SHIFT=true and shift_count="000000" then
shift_done <= '1';
else
shift_done <= '0';
end if;
-- Needs to happen outside the clock edge
eqbranch_zero<='0';
if IMPL_EQBRANCH=true and memBRead=X"00000000" then
eqbranch_zero <= '1';
end if;
if reset = '1' then
state <= State_Resync;
break <= '0';
sp <= to_unsigned(STACK_ADDR, maxAddrBit)(ADDR_32BIT_RANGE);
pc <= (others => '0');
idim_flag <= '0';
begin_inst <= '0';
memAAddr <= (others => '0');
memBAddr <= (others => '0');
memAWriteEnable <= '0';
memBWriteEnable <= '0';
out_mem_writeEnable <= '0';
out_mem_readEnable <= '0';
out_mem_bEnable <= '0';
out_mem_hEnable <= '0';
memAWrite <= (others => '0');
memBWrite <= (others => '0');
inInterrupt <= '0';
fetchneeded <= '1';
interrupt_ack <= '0';
interrupt_done <= '0';
if DEBUG_CPU = true then
debugRec <= ZPU_DBG_T_INIT;
debugCnt <= 0;
debugLoad <= '0';
end if;
elsif (clk'event and clk = '1') then
if DEBUG_CPU = true then
debugLoad <= '0';
end if;
memAWriteEnable <= '0';
memBWriteEnable <= '0';
-- If the cpu can run, continue with next state.
--
if DEBUG_CPU = false or (DEBUG_CPU = true and debugReady = '1') then
-- This saves ca. 100 LUT's, by explicitly declaring that the
-- memAWrite can be left at whatever value if memAWriteEnable is
-- not set.
memAWrite <= (others => DontCareValue);
memBWrite <= (others => DontCareValue);
--out_mem_addr <= (others => DontCareValue);
--mem_write <= (others => DontCareValue);
spOffset := (others => DontCareValue);
-- We want memAAddr to remain stable since the length of the fetch depends on external RAM.
--memAAddr <= (others => DontCareValue);
--memBAddr(ADDR_32BIT_RANGE) <= (others => DontCareValue);
out_mem_writeEnable <= '0';
--out_mem_bEnable <= '0';
--out_mem_hEnable <= '0';
out_mem_readEnable <= '0';
begin_inst <= '0';
--out_mem_addr <= std_logic_vector(memARead(ADDR_BIT_RANGE));
--mem_write <= std_logic_vector(memBRead);
decodedOpcode <= sampledDecodedOpcode;
opcode <= sampledOpcode;
-- If interrupt is active, we only clear the interrupt state once the PC is reset to the address which was suspended after the
-- interrupt, this prevents recursive interrupt triggers, desirable in cetain circumstances but not for this current design.
--
interrupt_ack <= '0'; -- Reset interrupt acknowledge if set, width is 1 clock only.
interrupt_done <= '0'; -- Reset interrupt done if set, width is 1 clock only.
if inInterrupt = '1' and pc(ADDR_BIT_RANGE) = interrupt_suspended_addr(ADDR_BIT_RANGE) then
inInterrupt <= '0'; -- no longer in an interrupt
interrupt_done <= '1'; -- Interrupt service routine complete.
end if;
-- Handle shift instructions
IF IMPL_SHIFT=true then
if shift_done='0' then
if shift_direction='1' then
shift_reg <= shift_reg(30 downto 0)&"0"; -- Shift left
else
shift_reg <= shift_sign&shift_reg(31 downto 1); -- Shift right
end if;
shift_count <= shift_count-1;
end if;
end if;
-- Pipelining of addition
add_low <= ("00"&memARead(15 downto 0)) + ("00"&memBRead(15 downto 0));
if IMPL_MULTIPLY=true then
tMultResult := memARead * memBRead;
end if;
if IMPL_COMPARISON_SUB=true then
comparison_sub_result <= unsigned('0'&memBRead)-unsigned('0'&memARead);
comparison_sign_mod <= memARead(wordSize-1) xor memBRead(wordSize-1);
end if;
case state is
when State_Execute =>
opcode_saved <= opcode;
state <= State_Fetch;
-- at this point:
-- memBRead contains opcode word
-- memARead contains top of stack
pc <= pc + 1;
fetchneeded <= '1';
state <= State_Fetch;
if CACHE = true or inrom = '0' then
if pc(1 downto 0) /= "11" then -- We fetch four bytes at a time.
fetchneeded <= '0';
state <= State_Decode;
end if;
end if;
-- during the next cycle we'll be reading the next opcode
spOffset(4) := not opcode(4);
spOffset(3 downto 0) := unsigned(opcode(3 downto 0));
-- Debug code, if enabled, writes out the current instruction.
if DEBUG_CPU = true and DEBUG_LEVEL >= 1 then
debugRec.FMT_DATA_PRTMODE <= "00";
debugRec.FMT_PRE_SPACE <= '0';
debugRec.FMT_POST_SPACE <= '0';
debugRec.FMT_PRE_CR <= '1';
debugRec.FMT_POST_CRLF <= '1';
debugRec.FMT_SPLIT_DATA <= "00";
debugRec.DATA_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA2_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA3_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA4_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.WRITE_DATA <= '0';
debugRec.WRITE_DATA2 <= '0';
debugRec.WRITE_DATA3 <= '0';
debugRec.WRITE_DATA4 <= '0';
debugRec.WRITE_OPCODE <= '1';
debugRec.WRITE_DECODED_OPCODE <= '1';
debugRec.WRITE_PC <= '1';
debugRec.WRITE_SP <= '1';
debugRec.WRITE_STACK_TOS <= '1';
debugRec.WRITE_STACK_NOS <= '1';
debugRec.DATA(63 downto 0) <= (others => '0');
debugRec.DATA2(63 downto 0) <= (others => '0');
debugRec.DATA3(63 downto 0) <= (others => '0');
debugRec.DATA4(63 downto 0) <= (others => '0');
debugRec.OPCODE <= opcode;
debugRec.DECODED_OPCODE <= std_logic_vector(to_unsigned(DecodedOpcodeType'POS(decodedOpcode), 6));
debugRec.PC(ADDR_BIT_RANGE) <= std_logic_vector(pc);
debugRec.SP(ADDR_32BIT_RANGE) <= std_logic_vector(sp);
debugRec.STACK_TOS <= std_logic_vector(memARead);
debugRec.STACK_NOS <= std_logic_vector(memBRead);
debugLoad <= '1';
end if;
idim_flag <= '0';
case decodedOpcode is
when Decoded_Interrupt =>
interrupt_ack <= '1'; -- Acknowledge interrupt.
interrupt_suspended_addr <= pc(ADDR_BIT_RANGE); -- Save address which got interrupted.
sp <= sp - 1;
memAAddr <= sp - 1;
memAWriteEnable <= '1';
memAWrite <= (others => DontCareValue);
memAWrite(ADDR_BIT_RANGE) <= pc;
pc <= (others => '0');
pc(5 downto 0) <= to_unsigned(32, 6); -- interrupt address
fetchneeded <= '1'; -- Need to set this any time PC changes.
state <= State_Fetch;
report "ZPU jumped to interrupt!" severity note;
when Decoded_Im =>
idim_flag <= '1';
memAWriteEnable <= '1';
if (idim_flag = '0') then
sp <= sp - 1;
memAAddr <= sp-1;
for i in wordSize-1 downto 7 loop
memAWrite(i) <= opcode(6);
end loop;
memAWrite(6 downto 0) <= unsigned(opcode(6 downto 0));
memBAddr <= sp;
else
memAAddr <= sp;
memAWrite(wordSize-1 downto 7) <= memARead(wordSize-8 downto 0);
memAWrite(6 downto 0) <= unsigned(opcode(6 downto 0));
memBAddr <= sp+1;
end if; -- idim_flag
when Decoded_StoreSP =>
memBWriteEnable <= '1';
memBAddr <= sp+spOffset;
memBWrite <= memARead;
sp <= sp + 1;
state <= State_Resync;
when Decoded_LoadSP =>
sp <= sp - 1;
memAAddr <= sp+spOffset;
state <= State_Fetch;
when Decoded_Emulate =>
sp <= sp - 1;
memAWriteEnable <= '1';
memAAddr <= sp - 1;
memAWrite <= (others => DontCareValue);
memAWrite(ADDR_BIT_RANGE) <= pc + 1;
-- The emulate address is:
-- 98 7654 3210
-- 0000 00aa aaa0 0000
pc <= (others => '0');
pc(9 downto 5) <= unsigned(opcode(4 downto 0));
fetchneeded <= '1'; -- Need to set this any time pc changes.
state <= State_Fetch;
when Decoded_AddSP =>
memAAddr <= sp;
memBAddr <= sp+spOffset;
state <= State_AddSP;
when Decoded_Break =>
report "Break instruction encountered" severity failure;
break <= '1';
state <= State_Fetch;
when Decoded_PushSP =>
memAWriteEnable <= '1';
memAAddr <= sp - 1;
memBAddr <= sp;
sp <= sp - 1;
memAWrite <= (others => DontCareValue);
memAWrite(ADDR_32BIT_RANGE) <= sp;
when Decoded_PopPC =>
pc <= memARead(ADDR_BIT_RANGE);
fetchneeded <= '1'; -- Need to set this any time PC changes.
sp <= sp + 1;
memAAddr <= sp+1;
memBAddr <= sp+2;
state <= State_Fetch;
when Decoded_EqBranch =>
if IMPL_EQBRANCH=true then
sp <= sp + 1;
if (eqbranch_zero xor opcode(0))='0' then -- eqbranch is 55, neqbranch is 56
pc <= pc + memARead(ADDR_BIT_RANGE);
fetchneeded <= '1'; -- Need to set this any time PC changes.
end if;
state <= State_IncSP;
end if;
when Decoded_Comparison =>
if IMPL_COMPARISON_SUB=true then
sp <= sp + 1;
state <= State_Comparison;
end if;
when Decoded_Add =>
sp <= sp + 1;
state <= State_Add;
when Decoded_Sub =>
if IMPL_COMPARISON_SUB=true then
sp <= sp + 1;
state <= State_Sub;
end if;
when Decoded_Or =>
memAAddr <= sp+1;
memBAddr <= sp+2;
memAWriteEnable <= '1';
memAWrite <= memARead or memBRead;
sp <= sp + 1;
when Decoded_And =>
memAAddr <= sp+1;
memBAddr <= sp+2;
memAWriteEnable <= '1';
memAWrite <= memARead and memBRead;
sp <= sp + 1;
when Decoded_Xor =>
memAAddr <= sp+1;
memBAddr <= sp+2;
memAWriteEnable <= '1';
memAWrite <= memARead xor memBRead;
sp <= sp + 1;
when Decoded_Mult =>
sp <= sp + 1;
state <= State_Mult;
when Decoded_Load =>
if (memARead(ioBit) = '1') then
out_mem_addr(1 downto 0) <= "00";
out_mem_addr(ADDR_32BIT_RANGE) <= std_logic_vector(memARead(ADDR_32BIT_RANGE));
-- FIXME trigger some kind of alignment exception if memARead(1 downto 0) are not zero
out_mem_readEnable <= '1';
state <= State_ReadIO;
else
memAAddr <= memARead(ADDR_32BIT_RANGE);
state <= State_Fetch;
end if;
when Decoded_LoadBH =>
out_mem_addr(ADDR_BIT_RANGE) <= std_logic_vector(memARead(ADDR_BIT_RANGE));
out_mem_bEnable <= opcode(0); -- Loadb is opcode 51, %00110011
out_mem_hEnable <= not opcode(0); -- Loadh is opcode 34, %00100010
out_mem_readEnable <= '1';
state <= State_ReadIOBH;
when Decoded_EqNeq =>
sp <= sp + 1;
state <= State_EqNeq;
when Decoded_Not =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= not memARead;
when Decoded_Flip =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
for i in 0 to wordSize-1 loop
memAWrite(i) <= memARead(wordSize-1-i);
end loop;
when Decoded_Store =>
memBAddr(ADDR_32BIT_RANGE) <= sp + 1;
sp <= sp + 1;
if (memARead(ioBit) = '0') then
state <= State_Store;
else
state <= State_WriteIO;
end if;
when Decoded_StoreBH =>
memBAddr(ADDR_32BIT_RANGE) <= sp + 1;
sp <= sp + 1;
state <= State_WriteIOBH;
when Decoded_PopSP =>
sp <= memARead(ADDR_32BIT_RANGE);
state <= State_Resync;
when Decoded_Call =>
if IMPL_CALL=true then
pc <= memARead(ADDR_BIT_RANGE); -- Set PC to value on top of stack
fetchneeded <= '1'; -- Need to set this any time PC changes.
memAWriteEnable <= '1';
memAAddr <= sp; -- Replace stack top with PC+1
memAWrite <= (others => DontCareValue);
memAWrite(ADDR_BIT_RANGE) <= pc + 1;
state <= State_Fetch;
end if;
when Decoded_Shift =>
IF IMPL_SHIFT=true then
sp <= sp + 1;
shift_count <= unsigned(memARead(5 downto 0)); -- 6 bit distance
shift_reg <= memBRead; -- 32-bit value
shift_direction <= opcode(0); -- 1 for left, (Opcode 43 for Ashiftleft)
shift_sign <= memBRead(31) and opcode(2); -- 1 for arithmetic, (opcode 44 for Ashiftright, 42 for lshiftright)
state <= State_Shift;
end if;
when Decoded_Nop =>
memAAddr <= sp;
state <= State_Fetch;
when others =>
null;
end case; -- decodedOpcode
-- From this point on opcode is not guaranteed to be valid if using BlockRAM.
when State_ReadIO =>
memAAddr <= sp;
if (in_mem_busy = '0') then
state <= State_Fetch;
memAWriteEnable <= '1';
memAWrite <= unsigned(mem_read);
end if;
if CACHE=false then
fetchneeded <= '1'; -- Need to set this any time out_mem_addr changes.
end if;
when State_ReadIOBH =>
if IMPL_LOADBH=true then
out_mem_bEnable <= opcode_saved(0); -- Loadb is opcode 51, %00110011
out_mem_hEnable <= not opcode_saved(0); -- Loadh is copde 34, %00100010
if in_mem_busy = '0' then
memAAddr <= sp;
-- memAWrite(31 downto 16)<=(others =>'0');
memAWrite(31 downto 8) <= (others =>'0');
-- if opcode_saved(0)='1' then -- byte read; upper 24 bits should be zeroed
-- if memARead(0)='1' then -- odd address
-- memAWrite(7 downto 0) <= unsigned(mem_read(7 downto 0));
-- else
-- memAWrite(7 downto 0) <= unsigned(mem_read(15 downto 8));
-- end if;
-- else -- short read; upper word should be zeroed.
if opcode_saved(0)='0' then -- only write the top 8 bits for halfword reads
memAWrite(15 downto 8) <= unsigned(mem_read(15 downto 8));
end if;
memAWrite(7 downto 0) <= unsigned(mem_read(7 downto 0));
-- end if;
state <= State_Fetch;
memAWriteEnable <= '1';
out_mem_bEnable <= '0';
out_mem_hEnable <= '0';
end if;
if CACHE=false then
fetchneeded <= '1'; -- Need to set this any time out_mem_addr changes.
end if;
end if;
when State_WriteIO =>
-- mem_writeMask <= (others => '1');
sp <= sp + 1;
out_mem_writeEnable <= '1';
out_mem_addr(1 downto 0) <= "00";
out_mem_addr(ADDR_BIT_RANGE) <= std_logic_vector(memARead(ADDR_BIT_RANGE));
-- FIXME - trigger and alignment exception if memARead(1 downto 0) are not zero.
mem_write <= std_logic_vector(memBRead);
state <= State_WriteIODone;
if CACHE=false then
fetchneeded <= '1'; -- Need to set this any time out_mem_addr changes.
end if;
-- (actually, only necessary for writes if mem_read doesn't hold its contents)
when State_WriteIOBH =>
if IMPL_STOREBH=true then
-- mem_writeMask <= (others => '1');
sp <= sp + 1;
out_mem_writeEnable <= '1';
out_mem_bEnable <= not opcode_saved(0); -- storeb is opcode 52
out_mem_hEnable <= opcode_saved(0); -- storeh is opcode 35
out_mem_addr <= std_logic_vector(memARead(ADDR_BIT_RANGE));
mem_write <= std_logic_vector(memBRead);
state <= State_WriteIODone;
if CACHE=false then
fetchneeded <= '1'; -- Need to set this any time out_mem_addr changes.
end if;
-- (actually, only necessary for writes if mem_read doesn't hold its contents)
end if;
when State_WriteIODone =>
if (in_mem_busy = '0') then
state <= State_Resync;
out_mem_bEnable <= '0';
out_mem_hEnable <= '0';
end if;
when State_Fetch =>
-- We need to resync. During the *next* cycle
-- we'll fetch the opcode @ pc and thus it will
-- be available for State_Execute the cycle after
-- next
memBAddr <= pc(ADDR_32BIT_RANGE);
state <= State_FetchNext;
when State_FetchNext =>
-- at this point memARead contains the value that is either
-- from the top of stack or should be copied to the top of the stack
if in_mem_busy='0' or fetchneeded='0' or inrom='1' then
memAWriteEnable <= '1';
memAWrite <= memARead;
memAAddr <= sp;
memBAddr <= sp + 1;
state <= State_Decode;
-- If debug enabled, write out state during fetch.
if DEBUG_CPU = true and DEBUG_LEVEL >= 2 then
debugRec.FMT_DATA_PRTMODE <= "00";
debugRec.FMT_PRE_SPACE <= '0';
debugRec.FMT_POST_SPACE <= '0';
debugRec.FMT_PRE_CR <= '1';
debugRec.FMT_POST_CRLF <= '1';
debugRec.FMT_SPLIT_DATA <= "00";
debugRec.DATA_BYTECNT <= std_logic_vector(to_unsigned(4, 3));
debugRec.DATA2_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA3_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA4_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.WRITE_DATA <= '1';
debugRec.WRITE_DATA2 <= '0';
debugRec.WRITE_DATA3 <= '0';
debugRec.WRITE_DATA4 <= '0';
debugRec.WRITE_OPCODE <= '0';
debugRec.WRITE_DECODED_OPCODE <= '0';
debugRec.WRITE_PC <= '1';
debugRec.WRITE_SP <= '1';
debugRec.WRITE_STACK_TOS <= '1';
debugRec.WRITE_STACK_NOS <= '1';
debugRec.DATA(63 downto 0) <= X"4645544348000000";
debugRec.DATA2(63 downto 0) <= (others => '0');
debugRec.DATA3(63 downto 0) <= (others => '0');
debugRec.DATA4(63 downto 0) <= (others => '0');
debugRec.OPCODE <= (others => '0');
debugRec.DECODED_OPCODE <= (others => '0');
debugRec.PC(ADDR_BIT_RANGE) <= std_logic_vector(pc);
debugRec.SP(ADDR_32BIT_RANGE) <= std_logic_vector(sp);
debugRec.STACK_TOS <= std_logic_vector(memARead);
debugRec.STACK_NOS <= std_logic_vector(memBRead);
debugLoad <= '1';
end if;
end if;
when State_StoreAndDecode =>
if interrupt_request = '1' and inInterrupt = '0' and idim_flag = '0' then
-- We got an interrupt, execute interrupt instead of next instruction
inInterrupt <= '1';
decodedOpcode <= Decoded_Interrupt;
end if;
memAWriteEnable <= '1';
memAWrite <= memARead;
memAAddr <= sp;
memBAddr <= sp + 1;
state <= State_Decode;
when State_Decode =>
if interrupt_request = '1' and inInterrupt = '0' and idim_flag = '0' then
-- We got an interrupt, execute interrupt instead of next instruction
inInterrupt <= '1';
decodedOpcode <= Decoded_Interrupt;
end if;
-- during the State_Execute cycle we'll be fetching SP+1 (AMR - already done at FetchNext, yes?)
memAAddr <= sp;
memBAddr <= sp + 1;
if fetchneeded='1' then
cachedprogramword <= programword;
fetchneeded <= '0';
end if;
state <= State_Execute;
when State_Store =>
sp <= sp + 1;
memAWriteEnable <= '1';
memAAddr(ADDR_32BIT_RANGE) <= memARead(ADDR_32BIT_RANGE);
memAWrite <= memBRead;
state <= State_Resync;
when State_AddSP =>
state <= State_AddSP2;
when State_AddSP2 =>
state <= State_Add;
when State_Add =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite(31 downto 16) <= memARead(31 downto 16)+memBRead(31 downto 16)+add_low(17 downto 16);
memAWrite(15 downto 0) <= add_low(15 downto 0);
state<=State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_Sub =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= comparison_sub_result(wordSize-1 downto 0);
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_Mult =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= tMultResult(wordSize-1 downto 0);
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_IncSP =>
sp <= sp+1;
state <= State_Resync;
when State_Resync =>
memAAddr <= sp;
memBAddr <= sp+1;
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_EqNeq =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= (others =>'0');
memAWrite(0) <= comparison_eq xor opcode_saved(4); -- eq is 46, neq is 48.
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_Comparison =>
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= (others => '0');
-- ulessthan: opcode 38, ulessthanorequal, 39
if opcode_saved(1) = '1' then
memAWrite(0) <= not (comparison_sub_result(wordSize) or (not opcode_saved(0) and comparison_eq));
else -- Signed comparison, lt: 36, ult: 37
memAWrite(0) <= not ((comparison_sub_result(wordSize) xor comparison_sign_mod) or (not opcode_saved(0) and comparison_eq));
end if;
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
when State_Shift =>
if shift_done='1' then
memAAddr <= sp;
memBAddr <= sp+1;
memAWriteEnable <= '1';
memAWrite <= shift_reg;
state <= State_Decode;
if fetchneeded = '1' then
state <= State_Fetch;
end if;
end if;
when State_Debug =>
case debugState is
when Debug_Start =>
-- Write out the primary data.
if DEBUG_CPU = true then
debugRec.FMT_DATA_PRTMODE <= "00";
debugRec.FMT_PRE_SPACE <= '0';
debugRec.FMT_POST_SPACE <= '0';
debugRec.FMT_PRE_CR <= '1';
debugRec.FMT_POST_CRLF <= '0';
debugRec.FMT_SPLIT_DATA <= "00";
debugRec.DATA_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA2_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA3_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA4_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.WRITE_DATA <= '0';
debugRec.WRITE_DATA2 <= '0';
debugRec.WRITE_DATA3 <= '0';
debugRec.WRITE_DATA4 <= '0';
debugRec.WRITE_OPCODE <= '0';
debugRec.WRITE_DECODED_OPCODE <= '0';
debugRec.WRITE_PC <= '1';
debugRec.WRITE_SP <= '1';
debugRec.WRITE_STACK_TOS <= '1';
debugRec.WRITE_STACK_NOS <= '1';
debugRec.DATA(63 downto 0) <= (others => '0');
debugRec.DATA2(63 downto 0) <= (others => '0');
debugRec.DATA3(63 downto 0) <= (others => '0');
debugRec.DATA4(63 downto 0) <= (others => '0');
debugRec.OPCODE <= (others => '0');
debugRec.DECODED_OPCODE <= (others => '0');
debugRec.PC(ADDR_BIT_RANGE) <= std_logic_vector(pc);
debugRec.SP(ADDR_32BIT_RANGE) <= std_logic_vector(sp);
debugRec.STACK_TOS <= std_logic_vector(memARead);
debugRec.STACK_NOS <= std_logic_vector(memBRead);
debugLoad <= '1';
debugCnt <= 0;
debugState <= Debug_DumpFifo;
end if;
when Debug_DumpFifo =>
-- Write out the opcode.
if DEBUG_CPU = true then
debugRec.FMT_DATA_PRTMODE <= "00";
debugRec.FMT_PRE_SPACE <= '0';
debugRec.FMT_POST_SPACE <= '1';
debugRec.FMT_PRE_CR <= '0';
if debugCnt = 3 then
debugRec.FMT_POST_CRLF <= '1';
else
debugRec.FMT_POST_CRLF <= '0';
end if;
debugRec.FMT_SPLIT_DATA <= "00";
debugRec.DATA_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA2_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA3_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.DATA4_BYTECNT <= std_logic_vector(to_unsigned(0, 3));
debugRec.WRITE_DATA <= '0';
debugRec.WRITE_DATA2 <= '0';
debugRec.WRITE_DATA3 <= '0';
debugRec.WRITE_DATA4 <= '0';
debugRec.WRITE_OPCODE <= '1';
debugRec.WRITE_DECODED_OPCODE <= '1';
debugRec.WRITE_PC <= '0';
debugRec.WRITE_SP <= '0';
debugRec.WRITE_STACK_TOS <= '0';
debugRec.WRITE_STACK_NOS <= '0';
debugRec.DATA(63 downto 0) <= (others => '0');
debugRec.DATA2(63 downto 0) <= (others => '0');
debugRec.DATA3(63 downto 0) <= (others => '0');
debugRec.DATA4(63 downto 0) <= (others => '0');
debugRec.OPCODE <= opcode;
debugRec.DECODED_OPCODE <= std_logic_vector(to_unsigned(DecodedOpcodeType'POS(decodedOpcode), 6));
debugRec.PC(ADDR_BIT_RANGE) <= (others => '0');
debugRec.SP(ADDR_32BIT_RANGE) <= std_logic_vector(sp);
debugRec.STACK_TOS <= (others => '0');
debugRec.STACK_NOS <= (others => '0');
debugLoad <= '1';
debugCnt <= 0;
debugState <= Debug_DumpFifo_1;
end if;
when Debug_DumpFifo_1 =>
-- Move onto next opcode in Fifo.
debugCnt <= debugCnt + 1;
if debugCnt = 3 then
debugState <= Debug_End;
else
debugState <= Debug_DumpFifo;
end if;
when Debug_End =>
state <= State_Execute;
end case;
when others =>
null;
end case; -- state
end if; -- Debug
end if; -- reset, enable
end process;
-----------------------------------------------------------------------------------------------------------------------------------------------------------
-- Debugger output processor.
-- This logic takes a debug record and expands it to human readable form then dispatches it to the debug serial port.
-----------------------------------------------------------------------------------------------------------------------------------------------------------
-- Add debug uart if required. Increasing the TX and DBG Fifo depth can help short term (ie. initial start of the CPU)
-- but once full, the debug run will eventually operate at the slowest denominator, ie. the TX speed and how quick it can
-- shift 10 bits.
DEBUG : if DEBUG_CPU = true generate
DEBUGUART: entity work.zpu_uart_debug
generic map (
CLK_FREQ => CLK_FREQ -- Frequency of master clock.
)
port map (
-- CPU Interface
CLK => clk, -- master clock
RESET => reset, -- high active sync reset
DEBUG_DATA => debugRec, -- write data
CS => debugLoad, -- Chip Select.
READY => debugReady, -- Debug processor ready for next command.
-- Serial data
TXD => debug_txd
);
end generate;
-----------------------------------------------------------------------------------------------------------------------------------------------------------
-- End of debugger output processor.
-----------------------------------------------------------------------------------------------------------------------------------------------------------
end behave;