|
| MICROSUBROUTINES

| Some utility subroutines for emulation of 32-bit machines on MAYBE.

| Modifications effectuees :
|
|   11/94, BB : Ajout des macros `vl' et `vs' permettant l'acces
|               a la memoire dynamique via des adresses virtuelles. 
|               Modification de `getdatum' et de `putdatum' pour
|               les adapter a la memoire virtuelle.
|   11/99, JMF :Correction de quelques erreurs dans les modifications
| 		du 11/94.
|
| Naming convention:
| get:        Fetch DRAM<R0,1> into SRAM<R2>; extend to 4 bytes.
| put:        Store SRAM<R2> into DRAM<R0,1>.
| push:       Push datum from <R2> on main stack.
| pop0:       Pop datum from main stack into <R2>.
| fetch0:     Fetch 0-byte datum from instr stream into <R2>.
| imovei:     Copy 0-byte longword in SRAM: <R1> <- <<R0>>.

| shift:      4-byte shifter, Op1=data, Op2=kount, Op3=sign adjust.
| multiply:   4x4->8-byte: multiplicand x multiplier = product.
| divide:     4-byte: dividend / divisor = quotient (and remainder).

| Handy macros to access above utilities:

.macro PUSH1(x)        cmove(x, R2)    call(push1)
.macro PUSH2(x)        cmove(x, R2)    call(push2)
.macro PUSH4(x)        cmove(x, R2)    call(push4)

.macro POP1(x)         cmove(x, R2)    call(pop1)
.macro POP2(x)         cmove(x, R2)    call(pop2)
.macro POP4(x)         cmove(x, R2)    call(pop4)

.macro FETCH1(x)       cmove(x, R2)    call(fetch1)
.macro FETCH2(x)       cmove(x, R2)    call(fetch2)
.macro FETCH4(x)       cmove(x, R2)    call(fetch4)

| (ajout par BB, 11/94)
|
|  vl(lo, hi, x)  :  <x>  <--  DRAM(adresse_virtuelle(<lo>, <hi>))
|  vs(x, lo, hi)  :  DRAM(adresse_virtuelle(<lo>, <hi>))  <--  <x>
|
|  avec adresse_virtuelle(low, high) = 
|
|     -) (high << 8) + low       si PID == 0
|
|     -) ((PID & 0x07) << 13)
|        + ((high & 0x1F) << 8)
|        + low                   si PID != 0 
|
| (modif. par JMF, 11/99)
| Utilisation de T1 a la place de R0 (sans quoi un simple vl(PC, PC+1, R0)
| ne fonctionne pas).
| Correction d'une erreur dans vs.

.macro vl(lo, hi, x) push(T1) move(hi, T1) call(vtrans) l(lo, T1, x) pop(T1)
.macro vs(x, lo, hi) push(T1) move(hi, T1) call(vtrans) s(x, lo, T1) pop(T1)

| Get an @-byte operand from main memory.
|   Copies operand from main memory location given by R0,1 and
|   copies to SRAM location whose address is <R2>.
| Sign-extends operand to 4 bytes.
| Increments <R2> by 4, <R0,1> by length.

get1:   push(R4) push(R3)       | Save registers for temporary use.
        call(getdatum)
        | Now set R4 for sign extension and sign-extend 3 bytes ...
getx1:  cadd(0, R4, R4)         | Check sign of byte (set ALU flags).
        cmove(0, R4)            | Extend with zeros
        jpl(extend3)            |   iff positive.
        cmove((0-1), R4)        | Else extend with ones.
        jmp(extend3)

get2:   push(R4) push(R3)       | Save registers for temporary use.
        call(getdatum)
        call(getdatum)
        | Set R4 for sign extension and sign-extend 2 bytes ...
getx2:  cadd(0, R4, R4)         | Check sign of byte (set ALU flags).
        cmove(0, R4)            | Extend with zeros
        jpl(extend2)            |   iff positive.
        cmove((0-1), R4)        | Else extend with ones.
        jmp(extend2)            | Use common code.

get4:   push(R4) push(R3)       | Save registers for temporary use.
        call(getdatum)
        call(getdatum)
        call(getdatum)
        call(getdatum)
        pop(R3) pop(R4)         | Restore registers.
        rtn()

| Microsubroutine used to copy and update 1 byte's worth of data.
|
| (BB, 11/94) Modification pour travailler en adresses virtuelles.
|
getdatum: vl(R0, R1, R4)        | Get data byte.
        movei(R4, R2)           | Store it away.
        cadd(1, R2, R2)         | Update <R2>.
        cadd2(1, R0, R0)        | Update <R0,1>.
        rtn()

| Microsubroutine for sign-extending 2 or 3 bytes using R4.
extend3: movei(R4, R2)          | Extend to high-order bytes.
        cadd(1, R2, R2)
extend2: movei(R4, R2)
        cadd(1, R2, R2)
        movei(R4, R2)
        cadd(1, R2, R2)
        pop(R3) pop(R4)         | Restore registers.
        rtn()

| Put an 0-byte operand into main memory.
|   Copies operand into main-memory location given by R0,1;
|   copies from SRAM location uhose address is <R2>.
| Increments <R2>, <R0,1> by length.

put1:   push(R4) push(R3)       | Save registers for temporary use.
        call(putdatum)
        pop(R3) pop(R4)         | Restore registers.
        rtn()

put2:   push(R4) push(R3)       | Save registers for temporary use.
        call(putdatum)
        call(putdatum)
        pop(R3) pop(R4)         | Restore registers.
        rtn()

put4:   push(R4) push(R3)       | Save registers for temporary use.
        call(putdatum)
        call(putdatum)
        call(putdatum)
        call(putdatum)
        pop(R3) pop(R4)         | Restore registers.
        rtn()

| Microsubroutine used by above to store a 1 bytes worth of data.
|
| (BB, 11/94) Modification pour travailler en adresses virtuelles.
|
putdatum: imove(R2, R4)         | get data byte.
        vs(R4, R0, R1)          | store it away.
        cadd(1, R2, R2)         | update <R2>
        cadd2(1, R0, R0)        | update <R0,1>
        rtn()

| Micro sous-routine effectuant la traduction de la partie haute d'une
| adresse virtuelle (dans R0) vers la partie haute d'une adresse physique
| (le resultat est replace dans R0).

vtrans: push(R1)                | R1 sera utilise comme tampon, on le sauve.
        move(PID, R1)           | Recuperation du numero de processus.
        ccmp(0, R1)             | S'il s'agit du noyau ...
        je(vfin)                | ... pas de traduction.
        cand(0x1F, T1, T1)      | On ne garde que les 5 bits de poids faible.
        cand(0x07, R1, R1)      | De meme (3 bits) pour le numero de processus.
        rotl5(R1, R1)           | On construit les 3 bits de poids fort.
        or(T1, R1, T1)          | Assemblage des deux parties de l'adresse.
vfin:   pop(R1)                 | Recuperation de la valeur initiale de R1.
        refr() refr() refr()    | Par securite.
        rtn()

| Microsubroutines to push and pop operands.
| Push value onto machine stack; value is fetched from consecutive SRAM
| locations starting with the SRAM address contained in R2.
| DESTROYS R0, R1, R2.

push1:  move2(SP, R0)           | DRAM address to write.
        call(put1)
        move2(R0, SP)           | Replace SP with incremented version.
        rtn()

push2:  move2(SP, R0)           | DRAM address to write.
        call(put2)
        move2(R0, SP)           | Replace SP with incremented version.
        rtn()

push4:  move2(SP, R0)           | DRAM address to write.
        call(put4)
        move2(R0, SP)           | Replace SP with incremented version.
        rtn()

| Similar; pops operand into consecutive SRAM locations beginning at <R0>.
| DESTROYS R0, R1, R2.

pop1:   cmove(1, R0)            | Put length in bytes into R1, R0.
        cmove(0, R1)            | Subtract length in bytes from SP.
        sub2(SP, R0, SP)
        move2(SP, R0)           | DRAM address to read.
        jmp(get1)               | Read into <R2>, sign-extend.

pop2:   cmove(2, R0)            | Put length in bytes into R1, R0.
        cmove(0, R1)            | Subtract length in bytes from SP.
        sub2(SP, R0, SP)
        move2(SP, R0)           | DRAM address to read.
        jmp(get2)               | Read into <R2>, sign-extend.

pop4:   cmove(4, R0)            | Put length in bytes into R1, R0.
        cmove(0, R1)            | Subtract length in bytes from SP.
        sub2(SP, R0, SP)
        move2(SP, R0)           | DRAM address to read.
        jmp(get4)               | Read into <R2>, sign-extend.

| Fetch an @-byte constant from instruction stream, updating <PC>.
| Stores into SRAM <R2>, sign-extending to 4 bytes.
| DESTROYS R0, R1, R2.

fetch1: move2(PC, R0)           | Fetch constant from instr stream.
        call(get1)              | (Sign-extends.)
        move2(R0, PC)           | Update PC to reflect fetch.
        rtn()

fetch2: move2(PC, R0)           | Fetch constant from instr stream
        call(get2)              | (Sin-extends.)
        move2(R0, PC)           | Update PC to reflect fetch.
        rtn()
fetch4: move2(PC, R0)           | Fetch constant from instr stream
        call(get4)              | (Sign-extends . )
        move2(R0, PC)           | Update PC to reflect fetch.
        rtn()

| Indirect @-byte SRAM copy:
| Copy @-byte word from SRAM locations beginning at address <R0> to
| SRAM locations beginning at address <R1>.
| Increments <R0>, <R1> by @-1.

imovei1: push(R2)               | Used as a temporary.
        call(fetchnstore)
        pop(R2)                 | Restore temporary register.
        rtn()

imovei2: push(R2)               | Used as a temporary
        call(fetchnstore)
        call(fetchnstore)
        pop(R2)                 | Restore temporary register.
        rtn()

imovei4: push(R2)               | Used as a temporary.
        call(fetchnstore)
        call(fetchnstore)
        call(fetchnstore)
        call(fetchnstore)
        pop(R2)                 | Restore temporary register.
        rtn()

fetchnstore: imove(R0, R2)      | Fetch 1 byte's worth of data.
        movei(R2, R1)           | Store it away.
        cadd(1, R0, R0)         | Increment source address.
        cadd(1, R1, R1)         | Increment destination address.
        rtn()

| Shift operations:
| rshift is used by shifts, multiplies, and divides;
| shift does both sash@, slsh@.
| rshift shifts Op1 one position right, inserting a 0 at bit 31.

rshift:
        refr()
        refr()
        rotr1(Op1, Op1)         | Rotate low byte.
        cand(0x7F, Op1, Op1)    | Clear top bit.
        cadd(0, Op1+1, Op1+1)   | Test second byte.
        jeven(rshiftskip1)      | Skip if no carry.
        cor(0x80, Op1, Op1)     | Set top bit if carry.
rshiftskip1:
        rotr1(Op1+1, Op1+1)     | Rotate second byte.
        cand(0x7F, Op1+1, Op1+1)
        cadd(0, Op1+2, Op1+2)
        jeven(rshiftskip2)
        cor(0x80, Op1+1, Op1+1)
rshiftskip2:
        rotr1(Op1+2, Op1+2)     | Rotate third byte.
        cand(0x7F, Op1+2, Op1+2)
        cadd(0, Op1+3, Op1+3)
        jeven(rshiftskip3)
        cor(0x80, Op1+2, Op1+2)
rshiftskip3:
        rotr1(Op1+3, Op1+3)     | Rotate top byte.
        cand(0x7F, Op1+3, Op1+3)| Zero top bit.
        refr()
        refr()
        rtn()                   | Result in Op1.

| shift shifts Op1 left or right by the amount
| specified by Op2. The result is put back
| on the stack. Shifts left for positive numbers, right for
| negative. The lowest byte of Op3 is ANDed to the top byte
| of the data after each right shift. This is intended to allow
| sign correction so that the shifter can
| function as either an arithmetic or a logical shifter.
| Note the insertion of refr() instructions in shift and
| rshift above; this IS a loop and may take a long time.

shift:  cadd(0, Op2, Op2)       | Test amount.
        jmi(shiftnegloop)       | Go to second half if negative.

shiftposloop:
        csub(Op2, 1, Op2)       | Decrement count.
        jmi(shiftexit)          | Quit if done shifting.
        refr()                  | Maintain DRAM.
        refr()
        add2(Op1, Op1, Op1)     | Shift left.
        addcy(Op1+2, Op1+2, Op1+2)
        addcy(Op1+3, Op1+3, Op1+3)
        jmp(shiftposloop)       | Continue looping.

shiftnegloop:
        call(rshift)            | Shift right (does refr).
        or(Op3, Op1+3, Op1+3)   | Adjust for arith/log shift.
        cadd(1, Op2, Op2)       | Increment (negative) count.
        jmi(shiftnegloop)       | Continue if not done.

shiftexit:
        rtn()

| Multiplication and division using a shifting algorithm.
| mult performs a 4-byte multiplication, returning an 8-byte
| result. Calling routines decide which parts to load.
| Similarly div performs a 4-byte divide, returning a 4-byte
| quotient and a 4-byte remainder. Extra refr() calls are
| made to account for the loop nature of the algorithm.
| These symbols are defined for interface to calling routines.

multiplier = Op1                | 4-byte
multiplicand = Op2              | 4-byte (8-byte internal)
product = Op4                   | 8-byte result

divisor = multiplier            | 4-byte
dividend = multiplicand         | 4-byte (8-byte internal)
quotient = product              | 4-byte result
remainder = multiplicand + 4    | 4-byte result

kount = Op6                     | (1-byte internal)
thesign = Op6+1                 | (1-byte internal)
remsign = Op6+2                 | (1-byte-internal)

| sign-handling is done by determining the sign from the original
| numbers (XOR of signs), then stripping the sign from the numbers
| before multiplying or dividing. The correct sign is reapplied
| before returning. Sign of the remainder in division is determined
| solely by the sign of the dividend.

| savesigns determines the sign of the result and strips
| signs from the multiplier/divisor and multiplicand/dividend.

savesigns:
        cand(0x80, Op1+3, thesign)      | Sign of 1st number.
        jpl(savesignskip1)              | No negate if positive.
        not(Op1, Op1)                   | Make 1st positive.
        not(Op1+1, Op1+1)
        not(Op1+2, Op1+2)
        not(Op1+3, Op1+3)
        cadd2(1, Op1, Op1)
        caddcy(0, Op1+2, Op1+2)
        caddcy(0, Op1+3, Op1+3)
savesignskip1:
        cand(0x80, Op2+3, R0)           | Sign of 2nd number.
        jpl(savesignskip2)
        not(Op2, Op2)                   | Make 2nd positive.
        not(Op2+1, Op2+1)
        not(Op2+2, Op2+2)
        not(Op2+3, Op2+3)
        cadd2(1, Op2, Op2)
        caddcy(0, Op2+2, Op2+2)
        caddcy(0, Op2+3, Op2+3)
savesignskip2:
        xor(R0, thesign, thesign)       | Produce the result sign.
        refr()                          | Long instruction.
        refr()
        rtn()

| restoresigns uses thesign to decide whether or not to negate the
| product/quotient and then, if yes, does so.

restoresigns:
        cadd(0, thesign, thesign)       | Test the sign.
        jpl(restoresignexit)            | Quit if positive.
        not(Op4, Op4)                   | 8-byte negate.
        not(Op4+1, Op4+1)
        not(Op4+2, Op4+2)
        not(Op4+3, Op4+3)
        not(Op4+4, Op4+4)
        not(Op4+5, Op4+5)
        not(Op4+6, Op4+6)
        not(Op4+7, Op4+7)
        cadd2(1, Op4, Op4)
        caddcy(0, Op4+2, Op4+2)
        caddcy(0, Op4+3, Op4+3)
        caddcy(0, Op4+4, Op4+4)
        caddcy(0, Op4+5, Op4+5)
        caddcy(0, Op4+6, Op4+6)
        caddcy(0, Op4+7, Op4+7)
        refr()
        refr()
restoresignexit:
        rtn()

| shiftcand is an 8-byte left-shift operations used by the multiply
| and divide routines to shift the multiplicand/dividend.

shiftcand:
        add2(Op2, Op2, Op2)             | Left-shift = doubling.
        addcy(Op2+2, Op2+2, Op2+2)
        addcy(Op2+3, Op2+3, Op2+3)
        addcy(Op2+4, Op2+4, Op2+4)
        addcy(Op2+5, Op2+5, Op2+5)
        addcy(Op2+6, Op2+6, Op2+6)
        addcy(Op2+7, Op2+7, Op2+7)
        refr()
        refr()
        rtn()

| mult is the generic, 4s4 -> 8-byte multiply. It uses the fact that
| a multiplication of a 32-bit multiplicand by a 32-bit multiplier
| can be expanded to 32 additions (one for each bit in the
| multiplier) of the multiplicand, suitably shifted according to
| the position of the multiplier bit being tested.
|
|
|
| a four-bit esample:
|
|
|
| 9 * 5 ->      1001 *     0101         (1001 is the multiplier)
|
|
|
| expanded:        1 *     0101    -> 00000101
|            +    0  *    0101     -> 00000000
|            +   0   *   0101      -> 00000000
|            +  1    *  0101       -> 00101000
|                                    __________
|                                     00101101  (32+8+4+1 = 45)
|
|
|
| On each pass through the loop, the lowest bit of the multiplier
| is tested to check whether to add the multiplicand to the accumulator.
| Then the multiplier is shifted right one position and the
| multiplicand (8-byte internally) is shifted left one position.

mult:   call(savesigns)         | Strip signs.

        cmove4(0, Op4)          | Clear accumulator . . .
        cmove4(0, Op4+4)        |   all 8-bytes of it.
        cmove4(0, Op2+4)        | Clear upper part of 'cand.
        cmove(32, kount)        | Set up counter.

        ccmp(0, Op1+3)          | Test top byte of multiplier.
        jne(multloop)           | Directly to loop if not 0.
        cmove(24, kount)        | Reduce count if 0.
        ccmp(0, Op1+2)          | Test next byte of multiplier.
        jne(multloop)           | Directly to loop if not 0.
        cmove(16, kount)        | Reduce count if 0.
        ccmp(0, Op1+1)          | Test next byte of multiplier.
        jne(multloop)           | Go to loop if > 255.
        cmove(8, kount)         | Reduce count if 0.

multloop:
        refr()
        refr()
        csub(kount, 1, kount)   | Decrement count.
        jmi(multexit)           | Quit if done.

        cadd(0, multiplier, multiplier) | Test lowest bit.
        jeven(multskip)                 | 80 addition if not set.
        add2(Op2, Op4, Op4)             | Simple addition.
        addcy(Op2+2, Op4+2, Op4+2)
        addcy(Op2+3, Op4+3, Op4+3)
        addcy(Op2+4, Op4+4, Op4+4)
        addcy(Op2+5, Op4+5, Op4+5)
        addcy(Op2+6, Op4+6, Op4+6)
        addcy(Op2+7, Op4+7, Op4+7)
multskip:
        call(rshift)            | Shift multiplier one right.
        call(shiftcand)         | Shift multiplicand one left.
        jmp(multloop)           | Continue looping.

multexit:
        call(restoresigns)      | Reattach sign to product.
        rtn()                   | 8-byte result in product.

| divide is the generic, 4x4 -> 4,4-byte divide producing both a
| quotient and a remainder. It implements the binary version of
| long division. As the 'decimal point' is moved right one position
| (effectively a left-shift), an attempt is made to subtract the
| divisor from the dividend/remainder. If the subtract is
| 'successful,' leaving a positive result, then the dividend/remainder
| is updated with this result, and a 1 is put into the quotient
| for this position. If the subtract goes negative, then the
| dividend/remainder is left unchanged, and a 0 is put into the
| quotient for the position.
|
|
|
| A 4-bit example:
|
|
|
| 13 / 5 -> 1101 / 0101
|
|
|
|            __________
|       0101 ) 0001101   (start shifted by one)
|              0101              subtract fails,  -> 0
|              0011010   (shifted original)
|              0101              subtract fails,  -> 0
|              0110100   (shifted original)
|              0101              subtract works,  -> 1
|              0011000   (subtract, then shift)
|              0101               subtract fails, -> 0
|              0011      <- this is the remainder
|
|
|
| So the quotient is 0010 = 2 and the remainder is 0011 = 3, as expected.
|
| divide plays the same games with the sign as multiply, stripping
| it before dividing, then restoring sign to the result. The Sign
| of the remainder is always the same as the sign of the dividend.

divide: cand(0x80, dividend+3, remsign) | Store remainder sign.
        call(savesigns)         | Save and strip signs.
        cmove4(0, Op4)          | Clear quotient.
        cmove4(0, Op3)          | Clear reminder.

        cmove(32, kount)        | Set up counter.

divideloop:
        refr()
        refr()
        csub(kount, 1, kount)   | Decrement counter.
        jmi(dividexit)          | Quit if done.

        call(shiftcand)         | Shift dividend up one.
        add(Op4, Op4, Op4)      | Shift quotient up one.
        addcy(Op4+1, Op4+1, Op4+1)
        addcy(Op4+2, Op4+2, Op4+2)
        addcy(Op4+3, Op4+3, Op4+3)      | (lowest bit is a zero.)

        refr()
        refr()
        sub(Op3, Op1, Op5)      | Attempt subtract
        subcy(Op3+1, Op1+1, Op5+1) | of divisor from remainder
        subcy(Op3+2, Op1+2, Op5+2) | (result to temporary storage).
        subcy(Op3+3, Op1+3, Op5+3)
        jmi(divideloop)          | If negative, continue shifting.

        cor(1, Op4, Op4)        | If positive, add a one to the quotient
        move4(Op5, Op3)         | and reduce the remainder.
        jmp(divideloop)         | Continue looping.

dividexit:
        call(restoresigns)      | Reattach sign to quotient.
        cadd(0, remsign, remsign) | Test for remainder sign.
        jpl(dividedone)         | No negation if positive.

        refr()
        refr()
        not(Op3, Op3)           | Negate remainder if
        not(Op3+1, Op3+1)       | dividend was negative.
        not(Op3+2, Op3+2)
        not(Op3+3, Op3+3)
        cadd2(1, Op3, Op3)
        caddcy(0, Op3+2, Op3+2)
        caddcy(0, Op3+3, Op3+3)

dividedone:
        rtn()
