! 32-bit arithmetic library.
! (C) 2001 David Given [email protected]
! This code may be used by anyone, for anything, unconditionally. Be warned
! that the code is not guaranteed to do anything, not even to work. I am not
! responsible for any problems caused by the use of this code.
!
! If you do want to use it, I'd appreciate being dropped a line to let me know
! people are interested.
!
! Usage: all numbers are stored in memory as four-byte arrays, big-endian.
! Pass pointers to the numbers into the functions. q paramters are inputs,
! z are outputs. For example:
!
! array in1 -> 4;
! array in2 -> 4;
! array out -> 4;
!
! [
!    __long_fromint(in1, 30000);
!    __long_fromint(in2, 1000);
!    __long_mul(in1, in2, out);
! ]
!
! Input and output parameters may be the same array, unless mentioned otherwise
! in the comments.
!
! Credits: loads of people, including the translator group at Tao for working
! out a number of the algorithms for me. The division routine is taken from
! code on the net; see the comment for the URL.
!
! Have fun.

! There's an Inform bug that stops the @not opcode working, so we have to use a
! conventional assignment.

[ __not q1;
       return ~q1;
];

! And the Z-machine doesn't have xor. How bizarre.

[ __xor q1 q2  t;
       return (q1 & (~q2)) | ((~q1) & q2);
];

! Unsigned arithmetic is hard. These helper functions do it for us.
! Thanks to Jay Foad for the algorithm for this.

[ __unsigned_div q1 q2  t;
       if (q1 == 0)
               return q1;
       else if (q2 == 0)
       {
               print "[error: division by zero in __unsigned_div]";
               return 0;
       }
       else if (q2 == 1)
               return q1;
       else if ((q1 > 0) && (q2 > 0))
               return q1/q2;

       ! Optimisation query: given that when executing Z-machine instructions,
       ! the decoding is by far the slowest part of the process, is it
       ! useful to have these special cases?

       !else if ((q1-32768) < (q2-32768))
       !       return 0;

       else if (q1 == q2)
               return 1;
       else if (q2 < 0)
       {
               ! If q2 is high, then the result can only be 0 or 1.
               ! The only way it can be 1 is if q1 > q2.
               return ((q1-32768) > (q2-32768));
       }

       ! Lose one bit of precision, and do the calculation.

       @log_shift q1 0-1 -> t;
       t = t / q2;
       @log_shift t 1 -> t;

       ! Now multiply back out and calculate the remainder. This tells
       ! us how much to modify the result by, to restore lost precision.

       !print "{", t, "}";
       !print "[", (q1 - t*q2), ">", q2, "]";

       if (((q1 - t*q2)-32768) >= (q2-32768))
               t++;
       return t;
];

[ __unsigned_mod q1 q2  t;
       t = __unsigned_div(q1, q2) * q2;
       return q1 - t;
];

! These wrapper functions do all the long arithmetic.

Array __long_temp1 -> 4;
Array __long_temp2 -> 4;
Array __long_temp3 -> 4;

[ __long_copy q1 z; ! z = q1
       z-->0 = q1-->0;
       z-->1 = q1-->1;
];

[ __long_loadconst z hi lo; ! z = hi.lo
       z-->0 = hi;
       z-->1 = lo;
];

[ __long_fromint z q1; ! z = (long)q1, with sign extension
       z-->1 = q1;
       if (q1 < 0)
               z-->0 = -1;
       else
               z-->0 = 0;
];

[ __long_and q1 q2 z;
       z-->0 = q1-->0 & q2-->0;
       z-->1 = q1-->1 & q2-->1;
];

[ __long_or q1 q2 z;
       z-->0 = q1-->0 | q2-->0;
       z-->1 = q1-->1 | q2-->1;
];

[ __long_asr q1 q2 z  s t;
       if ((q2-->0 ~= 0) || (((q2-->1)-32768) > (31-32768)))
       {
               ! All bits shifted off; so we just propagate the sign.

               if (q2-->0 & $8000)
               {
                       z-->0 = -1;
                       z-->1 = -1;
               }
               else
               {
                       z-->0 = 0;
                       z-->1 = 0;
               }

               return;
       }

       q2 = -(q2-->1 & $1F);
       s = 16 + q2;

       t = q1-->1;
       @log_shift t q2 -> t;
       z-->1 = t;

       t = q1-->0;
       @log_shift t s -> t;
       z-->1 = z-->1 | t;

       t = q1-->0;
       @art_shift t q2 -> t;
       z-->0 = t;
];

[ __long_lsr q1 q2 z  s t;
       if ((q2-->0 ~= 0) || ((q2-->1-32768) > (31-32768)))
       {
               ! All bits shifted off; the result is zero.

               z-->0 = 0;
               z-->1 = 0;
               return;
       }

       q2 = -(q2-->1 & $1F);
       s = 16 + q2;

       t = q1-->1;
       @log_shift t q2 -> t;
       z-->1 = t;

       t = q1-->0;
       @log_shift t s -> t;
       z-->1 = z-->1 | t;

       t = q1-->0;
       @log_shift t q2 -> t;
       z-->0 = t;
];

[ __long_neg q1 z  a b;
       a = ~(q1-->0);
       b = ~(q1-->1) + 1;
       if (~~b)
               a++;
       z-->0 = a;
       z-->1 = b;
];

[ __long_xor q1 q2 z  a b;
       a = q1-->0;
       b = q2-->0;
       z-->0 = (a & (~b)) | ((~a) & b);

       a = q1-->1;
       b = q2-->1;
       z-->1 = (a & (~b)) | ((~a) & b);
];

[ __long_add q1 q2 z  a b c cc;
       ! Add the low word, and detect overflow.

       a = q1-->1;
       b = q2-->1;
       c = a + b;
       z-->1 = c;
       @log_shift a 0-15 -> a;
       @log_shift b 0-15 -> b;
       @log_shift c 0-15 -> c;
       cc = a;
       if ((~~a) && b && (~~c))
               cc = 1;
       else if (a && b && (~~c))
               cc = 0;

       ! Add the high word, plus one if the low word overflowed.

       z-->0 = q1-->0 + q2-->0 + cc;
];

[ __long_sub q1 q2 z  a b c cc;
       ! Subtract the low word, and detect overflow.

       a = q1-->1;
       b = q2-->1;
       c = a - b;
       z-->1 = c;
       @log_shift a 0-15 -> a;
       @log_shift b 0-15 -> b;
       @log_shift c 0-15 -> c;

       ! Carry table:
       ! a b c  c
       ! 0 0 0  0
       ! 0 0 1  1
       ! 0 1 0  1
       ! 0 1 1  1
       ! 1 0 0  0
       ! 1 0 1  0
       ! 1 1 0  0
       ! 1 1 1  1

       cc = ~~a;
       if ((~~a) && (~~b) && (~~c))
               cc = 0;
       else if (a && b && c)
               cc = 1;

       ! Subtract the high word, plus one if the low word overflowed.

       z-->0 = q1-->0 - q2-->0 - cc;
];

! Algorithm converted from MIPS assembly, at:
! http://www.cz3.nus.edu.sg/~wangjs/CZ101/assembly-examples/divide.s
[ __long_unsigned_divmod q1 q2 d r  r1 r2 r3 r4 count t1 t2;
       ! Check for division by zero.

       if ((~~(q2-->0)) && (~~(q2-->1)))
       {
               print "[error: division by zero in __long_unsigned_divmod]";
               return 0;
       }

       count = 0;
       r1 = 0;
       r2 = 0;
       r3 = q1-->0;
       r4 = q1-->1;
       !print "[q1=", r3, " ", r4, " q2=", q2-->0, " ", q2-->1, " ";

       do {
               count++;

               ! Shift r1..r4 left by one bit.

               @log_shift r4 0-15 -> t1;
               @log_shift r4 1 -> r4;

               @log_shift r3 0-15 -> t2;
               @log_shift r3 1 -> r3;
               @or r3 t1 -> r3;

               @log_shift r2 0-15 -> t1;
               @log_shift r2 1 -> r2;
               @or r2 t2 -> r2;

               @log_shift r1 1 -> r1;
               @or r1 t1 -> r1;

               ! Subtract divisor from r1..r2.

               __long_temp3-->0 = r1;
               __long_temp3-->1 = r2;
               __long_sub(__long_temp3, q2, __long_temp3);

               ! Is the remainder non-negative?

               if (__long_temp3-->0 >= 0)
               {
                       ! Yes. Quotient gets a one; commit subtraction.

                       r1 = __long_temp3-->0;
                       r2 = __long_temp3-->1;

                       r4 = r4 | 1;
                       !print "1";
               }
               !else print "0";
               ! Otherwise the quotient gets a zero and the subtraction is not
               ! committed. No operation.
       } until (count == 32);

       ! Save results.

       !print " r=", r1, " ", r2;
       if (r)
       {
               r-->0 = r1;
               r-->1 = r2;
       }
       !print " d=", r3, " ", r4, "]";
       if (d)
       {
               d-->0 = r3;
               d-->1 = r4;
       }
];

[ __long_div q1 q2 z  t sign;
       ! Calculate final sign, and convert parameters to unsigned.

       t = q1-->0;
       if (t < 0)
               sign = 1;
       __long_temp1-->0 = t & $7FFF;
       __long_temp1-->1 = q1-->1;

       t = q2-->0;
       if (t < 0)
               sign = ~~sign;
       __long_temp2-->0 = t & $7FFF;
       __long_temp2-->1 = q2-->1;

       ! Do the actual divide.

       __long_unsigned_divmod(__long_temp1, __long_temp2, z, 0);

       ! Adjust sign.

       if (sign)
               __long_neg(z);
];

[ __long_mod q1 q2 z  t sign;
       ! Calculate final sign, and convert parameters to unsigned.

       t = q1-->0;
       if (t < 0)
               sign = 1;
       __long_temp1-->0 = t & $7FFF;
       __long_temp1-->1 = q1-->1;

       t = q2-->0;
       if (t < 0)
               sign = ~~sign;
       __long_temp2-->0 = t & $7FFF;
       __long_temp2-->1 = q2-->1;

       ! Do the actual modulo.

       __long_unsigned_divmod(__long_temp1, __long_temp2, 0, z);

       ! Adjust sign.

       if (sign)
               __long_neg(z);
];

[ __long_unsigned_div q1 q2 z;
       __long_unsigned_divmod(q1, q2, z, 0);
];

[ __long_unsigned_mod q1 q2 z;
       __long_unsigned_divmod(q1, q2, 0, z);
];

! Algorithm my own. Probably buggy (although it passes every test I've
! thrown at it).
[ __long_mul q1 q2 z  a b c d aa bb cc dd sign t;
       ! What we're doing here is long multiplication in base 256; so each
       ! digit is a byte.
       !
       !    A   B   C   D
       ! *  A'  B'  C'  D'
       ! = ---------------
       !   AD' BD' CD' DD'
       !   BC' CC' DC'
       !   CB' DB'
       !   DA'
       !
       ! We need to add up the columns, remembering to overflow into the
       ! next column. (Don't forget to make everything positive.)

       if (q1->0 >= $80)
       {
               ! q1 is negative.
               __long_neg(q1, __long_temp1);
               q1 = __long_temp1;
               sign = 1;
       }

       a  = q1->0;
       b  = q1->1;
       c  = q1->2;
       d  = q1->3;

       if (q2->0 >= $80)
       {
               ! q2 is negative.
               __long_neg(q2, __long_temp1);
               q2 = __long_temp1;
               sign = ~~sign;
       }

       aa = q2->0;
       bb = q2->1;
       cc = q2->2;
       dd = q2->3;

       ! D column.

       t = d*dd;
       z->3 = t;
       @log_shift t 0-8 -> t;

       ! C column.

       t = t + c*dd + d*cc;
       z->2 = t;
       @log_shift t 0-8 -> t;

       ! B column.
       t = t + b*dd + c*cc + d*bb;
       z->1 = t;
       @log_shift t 0-8 -> t;

       ! A column.
       t = t + a*dd + b*cc + c*bb + d*aa;
       !t = t & $7F;
       z->0 = t;

       ! Apply sign bit.

       if (sign)
               __long_neg(z, z);

       ! LongMul can't use the same output as one of its inputs.
       !LongMul(__long_temp1, q1, q2);
       !@copy_table __long_temp1 z 4;
];

[ __long_compare q1 q2  a b;
       a = q1-->0;
       b = q2-->0;
       if (a == b)
       {
               a = q1-->1 - 32768;
               b = q2-->1 - 32768;
       }

       if (a > b)
               return 1;
       if (a < b)
               return -1;
       return 0;
];

[ __long_unsigned_compare q1 q2  a b;
       a = q1-->0 - 32768;
       b = q2-->0 - 32768;
       if (a == b)
       {
               a = q1-->1 - 32768;
               b = q2-->1 - 32768;
       }

       if (a > b)
               return 1;
       if (a < b)
               return -1;
       return 0;
];