zSoft/startup/romcrt0.s

/* Startup code for ZPU
   Copyright (C) 2005 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file with other programs, and to distribute
those programs without any restriction coming from the use of this
file.  (The General Public License restrictions do apply in other
respects; for example, they cover modification of the file, and
distribution when not linked into another program.)

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/* Updates by: Philip Smart for the ZPU EVO project 2019-2020
   <philip.smart@net2net.org>                                 */

        .file    "romcrt0.s"
        .include "zpu_macros.s"


        ;    .section ".fixed_vectors","ax"
        ; KLUDGE!!! we remove the executable bit to avoid relaxation
        .section ".fixed_vectors","a"

        ; DANGER!!!!
        ; we need to align these code sections to 32 bytes, which
        ; means we must not use any assembler instructions that are relaxed
        ; at linker time
        ; DANGER!!!!

        /* vectors */
        .balign 32,0
        ; offset 0x0000 0000
        .global _start
_start: jmp _premain

        .balign 8,0
        .global _memreg
_memreg:.long 0
        .long 0
        .long 0
        .long 0

        .balign 32,0
        ; offset 0x0000 0020
        .global _zpu_interrupt_vector
_zpu_interrupt_vector:
        im 8+0        ; save R0
        load
        im 8+4        ; save R1
        load
        im 8+8        ; save R2
        load

        fixedim    _inthandler_fptr
        load
        call

        im 8+8
        store        ; restore R2
        im 8+4
        store        ; restore R1
        im 8+0
        store        ; restore R0
        poppc

        /* instruction emulation code */

        ; opcode 34
        ; offset 0x0000 0040
        .balign 32,0
        .global _loadh
_loadh: loadsp 4
        ; by not masking out bit 0, we cause a memory access error
        ; on unaligned access
        im ~0x2
        and
        load

        ; mult 8
        loadsp 8
        im 3
        and
        fast_neg
        im 2
        add
        im 3
        ashiftleft
        ; shift right addr&3 * 8
        lshiftright
        im 0xffff
        and
        storesp 8

        poppc

        ; opcode 35
        ; offset 0x0000 0060
        .balign 32,0
        .global _storeh
_storeh:loadsp 4
        ; by not masking out bit 0, we cause a memory access error
        ; on unaligned access
        im ~0x2
        and
        load

        ; mask
        im 0xffff
        loadsp 12
        im 3
        and
        fast_neg
        im 2
        add
        im 3
        ashiftleft
        ashiftleft
        not

        and

        loadsp 12
        im 0xffff

        nop

        fixedim _storehtail
        poppc


        ; opcode 36
        ; offset 0x0000 0080
        .balign 32,0
_lessthan:
        loadsp 8
        fast_neg
        loadsp 8
        add

        ; DANGER!!!!
        ; 0x80000000 will overflow when negated, so we need to mask
        ; the result above with the compare positive to negative
        ; number case
        loadsp 12
        loadsp 12
        not
        and
        not
        and

        ; handle case where we are comparing a negative number
        ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
        loadsp 12
        not
        loadsp 12
        and

        or

        flip
        im 1
        and

        storesp 12
        storesp 4
        poppc


        ; opcode 37
        ; offset 0x0000 00a0
        .balign 32,0
_lessthanorequal:
        loadsp 8
        loadsp 8
        lessthan
        loadsp 12
        loadsp 12
        eq
        or

        storesp 12
        storesp 4
        poppc


        ; opcode 38
        ; offset 0x0000 00c0
        .balign 32,0
_ulessthan:
        ; fish up arguments
        loadsp 4
        loadsp 12

        /* low: -1 if low bit dif is negative 0 otherwise:  neg (not x&1 and (y&1))
            x&1        y&1        neg (not x&1 and (y&1))
            1        1        0
            1        0         0
            0        1        -1
            0        0        0

        */
        loadsp 4
        not
        loadsp 4
        and
        im 1
        and
        /* neg */
        not
        im 1
        add


        /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
            high=x>>1 - y>>1 + low

            extremes

            0000 - 1111:
            low= neg(not 0 and 1) = 1111 (-1)
            high=000+ neg(111) +low = 000 + 1001 + low = 1000
            OK

            1111 - 0000
            low=neg(not 1 and 0) = 0
            high=111+neg(000) + low = 0111
            OK
        */
        loadsp 8

        flip
        addsp 0
        flip

        loadsp 8

        flip
        addsp 0
        flip

        sub

        ; if they are equal, then the last bit decides...
        add

        /* test if negative: result = flip(diff) & 1 */
        flip
        im 1
        and

        ; destroy a&b which are on stack
        storesp 4
        storesp 4

        storesp 12
        storesp 4
        poppc

        ; opcode 39
        ; offset 0x0000 00e0
        .balign 32,0
_ulessthanorequal:
        loadsp 8
        loadsp 8
        ulessthan
        loadsp 12
        loadsp 12
        eq
        or

        storesp 12
        storesp 4
        poppc


        ; opcode 40
        ; offset 0x0000 0100
        .balign 32,0
        .global _swap
_swap:  breakpoint ; tbd

        ; opcode 41
        ; offset 0x0000 0120
        .balign 32,0
_slowmult:
        im _slowmultImpl
        poppc

        ; opcode 42
        ; offset 0x0000 0140
        .balign 32,0
_lshiftright:
        loadsp 8
        flip

        loadsp 8
        ashiftleft
        flip

        storesp 12
        storesp 4

        poppc


        ; opcode 43
        ; offset 0x0000 0160
        .balign 32,0
_ashiftleft:
        loadsp 8

        loadsp 8
        im 0x1f
        and
        fast_neg
        im _ashiftleftEnd
        add
        poppc


        ; opcode 44
        ; offset 0x0000 0180
        .balign 32,0
_ashiftright:
        loadsp 8
        loadsp 8
        lshiftright

        ; handle signed value
        im -1
        loadsp 12
        im 0x1f
        and
        lshiftright
        not    ; now we have an integer on the stack with the signed
               ; bits in the right position

        ; mask these bits with the signed bit.
        loadsp 16
        not
        flip
        im 1
        and
        im -1
        add

        and

        ; stuff in the signed bits...
        or

        ; store result into correct stack slot
        storesp 12

        ; move up return value
        storesp 4
        poppc

        ; opcode 45
        ; offset 0x0000 01a0
        .balign 32,0
_call:  ; stack: return_addr call_addr ...
        ; fn
        loadsp 4    ;    call_addr return_addr call_addr ...

        ; return address
        loadsp 4    ;   return_addr call_addr return_addr call_addr ...

        ; store return address
        storesp 12    ;   call_addr return_addr return_addr

        ; fn to call
        storesp 4    ;   call_addr return_addr

        ; pushsp    ; flush internal stack
        ; popsp

        poppc

_storehtail:

        and
        loadsp 12
        im 3
        and
        fast_neg
        im 2
        add
        im 3
        ashiftleft
        nop
        ashiftleft

        or

        loadsp 8
        im  ~0x3
        and

        store

        storesp 4
        storesp 4
        poppc


        ; opcode 46
        ; offset 0x0000 01c0
        .balign 32,0
_eq:    loadsp 8
        fast_neg
        loadsp 8
        add

        not
        loadsp 0
        im 1
        add
        not
        and
        flip
        im 1
        and

        storesp 12
        storesp 4
        poppc

        ; opcode 47
        ; offset 0x0000 01e0
        .balign 32,0
_neq:   loadsp 8
        fast_neg
        loadsp 8
        add

        not
        loadsp 0
        im 1
        add
        not
        and
        flip

        not

        im 1
        and

        storesp 12
        storesp 4
        poppc


        ; opcode 48
        ; offset 0x0000 0200
        .balign 32,0
_neg:   loadsp 4
        not
        im 1
        add
        storesp 8

        poppc


        ; opcode 49
        ; offset 0x0000 0220
        .balign 32,0
_sub:   loadsp 8
        loadsp 8
        fast_neg
        add
        storesp 12

        storesp 4

        poppc


        ; opcode 50
        ; offset 0x0000 0240
        .balign 32,0
_xor:   loadsp 8
        not
        loadsp 8
        and

        loadsp 12
        loadsp 12
        not
        and

        or

        storesp 12
        storesp 4
        poppc

        ; opcode 51
        ; offset 0x0000 0260
        .balign 32,0
        .global _loadb
_loadb: loadsp 4
        im ~0x3
        and
        load

        loadsp 8
        im 3
        and
        fast_neg
        im 3
        add
        ; x8
        addsp 0
        addsp 0
        addsp 0

        lshiftright

        im 0xff
        and
        storesp 8

        poppc


        ; opcode 52
        ; offset 0x0000 0280
        .balign 32,0
        .global _storeb
_storeb:loadsp 4
        im ~0x3
        and
        load

        ; mask away destination
        im _mask
        loadsp 12
        im 3
        and
        addsp 0
        addsp 0
        add
        load

        and


        im _storebtail
        poppc

        ; opcode 53
        ; offset 0x0000 02a0
        .balign 32,0
_div:   cimpl __divsi3
        ;breakpoint

        ; opcode 54
        ; offset 0x0000 02c0
        .balign 32,0
_mod:   cimpl __modsi3
        ;breakpoint;

        ; opcode 55
        ; offset 0x0000 02e0
        .balign 32,0
        .global _eqbranch
_eqbranch:
        loadsp 8

        ; eq

        not
        loadsp 0
        im 1
        add
        not
        and
        flip
        im 1
        and

        ; mask
        im -1
        add
        loadsp 0
        storesp 16

        ; no branch address
        loadsp 4

        and

        ; fetch boolean & neg mask
        loadsp 12
        not

        ; calc address & mask for branch
        loadsp 8
        loadsp 16
        add
        ; subtract 1 to find PC of branch instruction
        im -1
        add

        and

        or

        storesp 4
        storesp 4
        storesp 4
        poppc


        ; opcode 56
        ; offset 0x0000 0300
        .balign 32,0
        .global _neqbranch
_neqbranch:
        loadsp 8

        ; neq

        not
        loadsp 0
        im 1
        add
        not
        and
        flip

        not

        im 1
        and

        ; mask
        im -1
        add
        loadsp 0
        storesp 16

        ; no branch address
        loadsp 4

        and

        ; fetch boolean & neg mask
        loadsp 12
        not

        ; calc address & mask for branch
        loadsp 8
        loadsp 16
        add
        ; find address of branch instruction
        im -1
        add

        and

        or

        storesp 4
        storesp 4
        storesp 4
        poppc

        ; opcode 57
        ; offset 0x0000 0320
        .balign 32,0
        .global _poppcrel
_poppcrel:
        add
        ; address of poppcrel
        im -1
        add
        poppc

        ; opcode 58
        ; offset 0x0000 0340
        .balign 32,0
        .global _config
_config:
        ; im 1
        ; nop
        ; im _hardware
        ; store
        ; storesp 4
        poppc

        ; opcode 59
        ; offset 0x0000 0360
        .balign 32,0
_pushpc:
        loadsp 4
        im 1
        add
        storesp 8
        poppc

        ; opcode 60
        ; offset 0x0000 0380
        .balign 32,0
_syscall_emulate:
        poppc
        .byte 0

        ; opcode 61
        ; offset 0x0000 03a0
        .balign 32,0
_pushspadd:
        pushsp
        im 4
        add
        loadsp 8
        addsp 0
        addsp 0
        add
        storesp 8

        poppc

        ; opcode 62
        ; offset 0x0000 03c0
        .balign 32,0
_halfmult:
        breakpoint

        ; opcode 63
        ; offset 0x0000 03e0
        .balign 32,0
_callpcrel:
        loadsp 4
        loadsp 4
        add
        im -1
        add
        loadsp 4

        storesp 12    ; return address
        storesp 4
        pushsp        ; this will flush the internal stack.
        popsp
        poppc


        .text

        .global _inthandler_fptr
        .balign 4,0
_inthandler_fptr:
        .long _default_inthandler

        .balign 4,0
_default_inthandler:
        poppc


        .balign 4,0
_ashiftleftBegin:
        .rept 0x1f
        addsp 0
        .endr
_ashiftleftEnd:
        storesp 12
        storesp 4
        poppc

        .balign 4,0
_storebtail:
        loadsp 12
        im 0xff
        and
        loadsp 12
        im 3
        and

        fast_neg
        im 3
        add
        ; x8
        addsp 0
        addsp 0
        addsp 0

        ashiftleft

        or

        loadsp 8
        im  ~0x3
        and

        store

        storesp 4
        storesp 4
        poppc


_slowmultImpl:

        loadsp 8 ; A
        loadsp 8 ; B
        im 0 ; C

.LmoreMult:
        mult1bit

        ; cutoff
        loadsp 8
        .byte (.LmoreMult-.Lbranch)&0x7f+0x80
.Lbranch:
        neqbranch

        storesp 4
        storesp 4
        storesp 12
        storesp 4
        poppc

        .section ".text","ax"

        .weak _premain
_premain:
        ;    clear BSS data, then call main.

        im __bss_start__    ; bssptr
.clearloop:
        loadsp 0            ; bssptr bssptr
        im __bss_end__      ; __bss_end__  bssptr bssptr
        ulessthanorequal    ; (bssptr<=__bss_end__?) bssptr
        impcrel .done       ; &.done (bssptr<=__bss_end__?) bssptr
        neqbranch           ; bssptr
        im 0                ; 0 bssptr
        loadsp 4            ; bssptr 0 bssptr
        loadsp 0            ; bssptr bssptr 0 bssptr
        im 4                ; 4 bssptr bssptr 0 bssptr
        add                 ; bssptr+4 bssptr 0 bssptr
        storesp 12          ; bssptr 0 bssptr+4
        store               ; (write 0->bssptr)  bssptr+4
        im .clearloop       ; &.clearloop bssptr+4
        poppc               ; bssptr+4
.done:
        im _break           ; &_break bssptr+4
        storesp 4           ; &_break
        im main             ; &main &break
        poppc               ; &break

        .global _boot
        .balign 4,0
_boot:
        im 0
        poppc

        .global _break;
_break:
        breakpoint
        im _break
        poppc ; infinite loop

        ; .data ; This is read only, so we don't really want it in a normal data section
        .section ".rodata"
        .balign 4,0
_mask:  .long 0x00ffffff
        .long 0xff00ffff
        .long 0xffff00ff
        .long 0xffffff00