/*
 *   MIRACL Karatsuba method for multiprecision multiplication combined with 
 *   Comba's method for high speed assembly language multiplication
 *   and Montgomery's method for modular muliplication - the KCM Method - 
 *   together yield near optimal speed for exponentiation cryptosystems.
 *
 *   mrkcm.tpl 
 *
 *   This approach is recommended for maximum speed where parameters 
 *   are fixed and compute resources are constrained. The processor must 
 *   support an unsigned multiply instruction, and should have a carry flag.
 *
 *   This file is a template. To fill in the gaps and create mrkcm.c
 *   you must run the mex.c program to insert the C or assembly language
 *   macros from the appropriate .mcs file. For use with C, MR_NOASM must
 *   be defined in mirdef.h
 *
 *   This method would appear to be particularly useful for implementing fast
 *   RSA/DSA/DH Cryptosystems.
 *
 *   The #define MR_KCM in mirdef.h affects the size of modulus that can
 *   be used. This *must* be determined at compile time. Then any modulus
 *   of size in words = MR_KCM*2^n can be used. For example if MR_KCM=8 
 *   a modulus of size 8,16,32,64 etc can be used. So if MR_KCM = 8 on a
 *   32 bit computer, then the modulus may be 256, 512, 1024, 2048 etc.
 *   bits in length 
 *
 *   Note that this module can generate a *lot* of code for larger values of 
 *   MR_KCM. This should have a maximum value of 8-16.
 *
 *   Note that on some processors it is *VITAL* that arrays be aligned on 
 *   4-byte boundaries
 *
 *   Inspiration from Wei Dai is acknowledged
 *
 *  **** This code does not like -fomit-frame-pointer using GCC  ***********
 *
 *   Copyright (c) 1988-2001 Shamus Software Ltd.
 */

#include "miracl.h"

#ifdef MR_KCM
  
asm c_mul(mr_small *a,mr_small *b,mr_small *c)
{
/*** MULTIPLY ***/
  mov $dspmode,1
  xor $r7,$r7
  xor $r1,$r1
  xor $r0,$r0
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*0)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*1)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*2)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*3)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*4)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*5)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*6)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*7)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*8)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*9)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*10)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*11)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*12)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*13)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*14)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*15)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*16)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*17)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*18)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*19)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*20)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*21)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*22)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*23)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*24)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*25)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*26)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*27)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*28)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*29)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*30)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  stw $r4(4*31)
  mov $data0,$r0
  ret
}

static void mr_comba_mul(mr_small *x,mr_small *y,mr_small *z)
{ /* multiply two arrays of length MR_KCM */ 
    mr_small *a,*b,*c;

    a=x; b=y; c=z;
   
    c_mul(a,b,c);

}   

asm c_mulup(mr_small *a,mr_small *b,mr_small *c)
{
/*** MULTUP ***/
  mov $dspmode,1
  xor $r7,$r7
  xor $r1,$r1
  xor $r0,$r0
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*0)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*1)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*2)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*3)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*4)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*5)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*6)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*7)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*8)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*9)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*10)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*11)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*12)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*13)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r4(4*14)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r3(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r3(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r3(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r3(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r3(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r3(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r3(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r3(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r3(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r3(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r3(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r3(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r3(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r3(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r3(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  ldw $r2(4*15)
  mov $r6,$data1
  ldw $r3(4*0)
  mul $r6,$r6,$data1
  add $r0,$r6
  stw $r4(4*15)
  mov $data0,$r0
  ret
}

static void mr_comba_halfm(mr_small *x,mr_small *y,mr_small *z)
{ /* multiply two arrays, but only return lower half */
    mr_small *a,*b,*c;

    a=x; b=y;  c=z;
 
    c_mulup(a,b,c);
}

asm c_sqr(mr_small *a,mr_small *c)
{
/*** SQUARE ***/
  mov $dspmode,1
  xor $r7,$r7
  xor $r1,$r1
  xor $r0,$r0
  ldw $r2(4*0)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*0)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*1)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*1)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*2)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*2)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*3)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*4)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*3)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*5)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*6)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*4)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*7)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*8)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*5)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*9)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*10)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*6)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*11)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*12)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*7)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*13)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*14)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*0)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*8)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*15)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*1)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*16)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*2)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*9)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*17)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*3)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*18)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*4)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*10)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*19)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*5)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*20)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*6)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r2(4*11)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*21)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*7)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*22)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*8)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r2(4*12)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*23)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*9)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*24)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*10)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r2(4*13)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*25)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*11)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*26)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*12)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r2(4*14)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*27)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*13)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  ldw $r2(4*14)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*28)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*14)
  mov $r6,$data1
  ldw $r2(4*15)
  mul $r6,$r6,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*29)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  ldw $r2(4*15)
  mul $r6,$data1,$data1
  add $r0,$r6
  adc $r1,$msw,$r1
  adc $r7,$r7,$0
  stw $r3(4*30)
  mov $data0,$r0
  mov $r0,$r1
  mov $r1,$r7
  xor $r7,$r7
  stw $r3(4*31)
  mov $data0,$r0
  ret
}

static void mr_comba_sqr(mr_small *x,mr_small *z)
{ /* square an array of length MR_KCM */
    mr_small *a,*c;

    a=x; c=z;
    
    c_sqr(a,c);
}

asm c_sum(mr_small *a,mr_small *b,mr_small *c,int n,mr_small *carry)
{
/*** SUMMATION ***/
  add $r0,$0,$0 
  stw $r4,4
  k1:
  ldw $r3(4*0)
  mov $r0,$data1
  ldw $r2(4*0)
  adc $data0,$r0,$data1 /* 0 */
  ldw $r3(4*1)
  mov $r0,$data1
  ldw $r2(4*1)
  adc $data0,$r0,$data1 /* 1 */
  ldw $r3(4*2)
  mov $r0,$data1
  ldw $r2(4*2)
  adc $data0,$r0,$data1 /* 2 */
  ldw $r3(4*3)
  mov $r0,$data1
  ldw $r2(4*3)
  adc $data0,$r0,$data1 /* 3 */
  ldw $r3(4*4)
  mov $r0,$data1
  ldw $r2(4*4)
  adc $data0,$r0,$data1 /* 4 */
  ldw $r3(4*5)
  mov $r0,$data1
  ldw $r2(4*5)
  adc $data0,$r0,$data1 /* 5 */
  ldw $r3(4*6)
  mov $r0,$data1
  ldw $r2(4*6)
  adc $data0,$r0,$data1 /* 6 */
  ldw $r3(4*7)
  mov $r0,$data1
  ldw $r2(4*7)
  adc $data0,$r0,$data1 /* 7 */
  ldw $r3(4*8)
  mov $r0,$data1
  ldw $r2(4*8)
  adc $data0,$r0,$data1 /* 8 */
  ldw $r3(4*9)
  mov $r0,$data1
  ldw $r2(4*9)
  adc $data0,$r0,$data1 /* 9 */
  ldw $r3(4*10)
  mov $r0,$data1
  ldw $r2(4*10)
  adc $data0,$r0,$data1 /* 10 */
  ldw $r3(4*11)
  mov $r0,$data1
  ldw $r2(4*11)
  adc $data0,$r0,$data1 /* 11 */
  ldw $r3(4*12)
  mov $r0,$data1
  ldw $r2(4*12)
  adc $data0,$r0,$data1 /* 12 */
  ldw $r3(4*13)
  mov $r0,$data1
  ldw $r2(4*13)
  adc $data0,$r0,$data1 /* 13 */
  ldw $r3(4*14)
  mov $r0,$data1
  ldw $r2(4*14)
  adc $data0,$r0,$data1 /* 14 */
  ldw $r3(4*15)
  mov $r0,$data1
  ldw $r2(4*15)
  adc $data0,$r0,$data1 /* 15 */
  dec $r5,1
  jeq k2
  mov $r0,0
  adc $r0,$r0,$0
  add $r2,$r2,4*16
  add $r3,$r3,4*16
  mov $r1,-1
  add $r1,$r0  /* 16 */
  j k1
  k2:
  mov $r0,$0
  adc $r0,$r0,$0
  stw $r6(0)
  mov $data0,$r0
  ret
}

static int mr_addn(mr_small *x,mr_small *y,mr_small *z,int n)
{ /* add two arrays of length n*MR_KCM */
  /* first some macros */
    mr_small *a,*b,*c;
    mr_small carry;

    a=x; b=y; c=z;

    c_sum(a,b,c,n,&carry);

    return (int)carry;
}

asm c_inc(mr_small *a,mr_small *b,int n,mr_small *carry)
{
/*** INCREMENTATION */
  add $r0,$0,$0 
  stw $r2,4
  k3:
  ldw $r2(4*0)
  mov $r0,$data1
  ldw $r3(4*0)
  adc $data0,$r0,$data1 /* 0  */
  ldw $r2(4*1)
  mov $r0,$data1
  ldw $r3(4*1)
  adc $data0,$r0,$data1 /* 1  */
  ldw $r2(4*2)
  mov $r0,$data1
  ldw $r3(4*2)
  adc $data0,$r0,$data1 /* 2  */
  ldw $r2(4*3)
  mov $r0,$data1
  ldw $r3(4*3)
  adc $data0,$r0,$data1 /* 3  */
  ldw $r2(4*4)
  mov $r0,$data1
  ldw $r3(4*4)
  adc $data0,$r0,$data1 /* 4  */
  ldw $r2(4*5)
  mov $r0,$data1
  ldw $r3(4*5)
  adc $data0,$r0,$data1 /* 5  */
  ldw $r2(4*6)
  mov $r0,$data1
  ldw $r3(4*6)
  adc $data0,$r0,$data1 /* 6  */
  ldw $r2(4*7)
  mov $r0,$data1
  ldw $r3(4*7)
  adc $data0,$r0,$data1 /* 7  */
  ldw $r2(4*8)
  mov $r0,$data1
  ldw $r3(4*8)
  adc $data0,$r0,$data1 /* 8  */
  ldw $r2(4*9)
  mov $r0,$data1
  ldw $r3(4*9)
  adc $data0,$r0,$data1 /* 9  */
  ldw $r2(4*10)
  mov $r0,$data1
  ldw $r3(4*10)
  adc $data0,$r0,$data1 /* 10  */
  ldw $r2(4*11)
  mov $r0,$data1
  ldw $r3(4*11)
  adc $data0,$r0,$data1 /* 11  */
  ldw $r2(4*12)
  mov $r0,$data1
  ldw $r3(4*12)
  adc $data0,$r0,$data1 /* 12  */
  ldw $r2(4*13)
  mov $r0,$data1
  ldw $r3(4*13)
  adc $data0,$r0,$data1 /* 13  */
  ldw $r2(4*14)
  mov $r0,$data1
  ldw $r3(4*14)
  adc $data0,$r0,$data1 /* 14  */
  ldw $r2(4*15)
  mov $r0,$data1
  ldw $r3(4*15)
  adc $data0,$r0,$data1 /* 15  */
  dec $r4,1
  jeq k4
  mov $r0,0
  adc $r0,$r0,$0
  add $r2,$r2,4*16
  add $r3,$r3,4*16
  mov $r1,-1
  add $r1,$r0 
  j k3
  k4:
  mov $r0,$0
  adc $r0,$r0,$0
  stw $r5(0)
  mov $data0,$r0
  ret
}

static int mr_incn(mr_small *y,mr_small *z,int n)
{ /* add to an array of length n*MR_KCM */
    mr_small *a,*b;
    mr_small carry;
#ifdef MR_ITANIUM
    register mr_small ma,u;
#endif
#ifdef MR_NOASM
    mr_large u;
#endif

    a=z; b=y;

    c_inc(a,b,n,&carry);

    return (int)carry;
}

asm c_dec(mr_small *a,mr_small *b,int n,mr_small *carry)
{
/*** DECREMENTATION */
  add $r0,$0,$0 
  stw $r2,4
  k5:
  ldw $r2(4*0)
  mov $r0,$data1
  ldw $r3(4*0)
  sbb $data0,$r0,$data1 /* 0 */
  ldw $r2(4*1)
  mov $r0,$data1
  ldw $r3(4*1)
  sbb $data0,$r0,$data1 /* 1 */
  ldw $r2(4*2)
  mov $r0,$data1
  ldw $r3(4*2)
  sbb $data0,$r0,$data1 /* 2 */
  ldw $r2(4*3)
  mov $r0,$data1
  ldw $r3(4*3)
  sbb $data0,$r0,$data1 /* 3 */
  ldw $r2(4*4)
  mov $r0,$data1
  ldw $r3(4*4)
  sbb $data0,$r0,$data1 /* 4 */
  ldw $r2(4*5)
  mov $r0,$data1
  ldw $r3(4*5)
  sbb $data0,$r0,$data1 /* 5 */
  ldw $r2(4*6)
  mov $r0,$data1
  ldw $r3(4*6)
  sbb $data0,$r0,$data1 /* 6 */
  ldw $r2(4*7)
  mov $r0,$data1
  ldw $r3(4*7)
  sbb $data0,$r0,$data1 /* 7 */
  ldw $r2(4*8)
  mov $r0,$data1
  ldw $r3(4*8)
  sbb $data0,$r0,$data1 /* 8 */
  ldw $r2(4*9)
  mov $r0,$data1
  ldw $r3(4*9)
  sbb $data0,$r0,$data1 /* 9 */
  ldw $r2(4*10)
  mov $r0,$data1
  ldw $r3(4*10)
  sbb $data0,$r0,$data1 /* 10 */
  ldw $r2(4*11)
  mov $r0,$data1
  ldw $r3(4*11)
  sbb $data0,$r0,$data1 /* 11 */
  ldw $r2(4*12)
  mov $r0,$data1
  ldw $r3(4*12)
  sbb $data0,$r0,$data1 /* 12 */
  ldw $r2(4*13)
  mov $r0,$data1
  ldw $r3(4*13)
  sbb $data0,$r0,$data1 /* 13 */
  ldw $r2(4*14)
  mov $r0,$data1
  ldw $r3(4*14)
  sbb $data0,$r0,$data1 /* 14 */
  ldw $r2(4*15)
  mov $r0,$data1
  ldw $r3(4*15)
  sbb $data0,$r0,$data1 /* 15 */
  dec $r4,1
  jeq k6
  mov $r0,0
  adc $r0,$r0,$0
  add $r2,$r2,4*16
  add $r3,$r3,4*16
  mov $r1,-1
  add $r1,$r0 
  j k5
  k6:
  mov $r0,$0
  adc $r0,$r0,$0
  stw $r5(0)
  mov $data0,$r0
  ret
}

static int mr_decn(mr_small *y,mr_small *z,int n)
{ /* subtract from an array of length n*MR_KCM */
    mr_small *a,*b;
    mr_small carry;
#ifdef MR_ITANIUM
    register mr_small ma,u;
#endif
#ifdef MR_NOASM
    mr_large u;
#endif

    a=z; b=y;

    c_dec(a,b,n,&carry);

    return (int)carry;
}

static void mr_cpy(mr_small *x,mr_small *z,int n)
{ /* copy an array of length n*MR_KCM */
    int m;
    for (m=0;m<n*MR_KCM;m++) z[m]=x[m];
}

static void mr_karmul(int n,mr_small *t,mr_small *x,mr_small *y,mr_small *z)
{ /* full multiplication z=x*y */
    int i,nd2,m,md2;
    mr_small c1,c2,c=0;
    if (n==MR_KCM)
    { /* reached the bottom... */
        mr_comba_mul(x,y,z);
        return;
    }
    nd2=n/2;
    m=n/MR_KCM;
    md2=m/2;
 
    c1=mr_addn(x,&x[nd2],z,md2);
 /*   printf("x[0]= %x x[nd2]= %x\n",x[0],x[nd2]);
    printf("z[0]= %x\n",z[0]);*/
    c2=mr_addn(y,&y[nd2],&z[nd2],md2);
/*   printf("c1= %d c2= %d\n",c1,c2);
    printf("z[nd2]= %x\n",z[nd2]);*/
    mr_karmul(nd2,&t[n],z,&z[nd2],t);
    if (c1) c+=mr_incn(&z[nd2],&t[nd2],md2);
    if (c2) c+=mr_incn(z,&t[nd2],md2);
    if (c1&c2) c++;    /* form (a0+a1).(b0+b1), carry in c */
    mr_karmul(nd2,&t[n],x,y,z);
    c-=mr_decn(z,t,m);           /* subtract a0.b0 */
    mr_karmul(nd2,&t[n],&x[nd2],&y[nd2],&z[n]);
    c-=mr_decn(&z[n],t,m);       /* subtract a1.b1 */
    c+=mr_incn(t,&z[nd2],m);    /* combine results */

    i=n+nd2;
    z[i]+=c;
    if (z[i]<c)
    { /* propagate carries - very rare */
        do 
        {
            i++;
            z[i]++;
        } while (z[i]==0);
    }
}

static void mr_karsqr(int n,mr_small *t,mr_small *x,mr_small *z)
{ /* Squaring z=x*x */
    int i,nd2,m;
    mr_small c;
    if (n==MR_KCM)
    {
        mr_comba_sqr(x,z);
        return;
    }
    nd2=n/2;
    m=n/MR_KCM;
  
    mr_karsqr(nd2,&t[n],x,z);
    mr_karsqr(nd2,&t[n],&x[nd2],&z[n]);
    mr_karmul(nd2,&t[n],x,&x[nd2],t);

    c=mr_incn(t,&z[nd2],m);
    c+=mr_incn(t,&z[nd2],m);

    i=n+nd2;
    z[i]+=c;
    if (z[i]<c)
    { /* propagate carries - very rare */
        do 
        {
            i++;
            z[i]++;
        } while (z[i]==0);
    }
}

static void mr_karmul_lower(int n,mr_small *t,mr_small *x,mr_small *y,mr_small *z)
{ /* Calculates Least Significant bottom half of x*y */
    int nd2,m,md2;
    if (n==MR_KCM)
    { /* only calculate bottom half of product */
        mr_comba_halfm(x,y,z);
        return;
    }
    nd2=n/2;
    m=n/MR_KCM;
    md2=m/2;

    mr_karmul(nd2,&t[n],x,y,z);
    mr_karmul_lower(nd2,&t[n],&x[nd2],y,t); 
    mr_incn(t,&z[nd2],md2);
    mr_karmul_lower(nd2,&t[n],x,&y[nd2],t); 
    mr_incn(t,&z[nd2],md2);
}

static void mr_karmul_upper(int n,mr_small *t,mr_small *x,mr_small *y,mr_small *z)
{ /* Calculates Most Significant upper half of x*y, given lower part */
    int i,nd2,m,md2;
    mr_small c1,c2,c=0;
    if (n==MR_KCM)
    {
        mr_comba_mul(x,y,z);
        return;
    }
    nd2=n/2;
    m=n/MR_KCM;
    md2=m/2;
    c1=mr_addn(x,&x[nd2],&z[n],md2);
    c2=mr_addn(y,&y[nd2],&z[n+nd2],md2);
    mr_karmul(nd2,&t[n],&z[n],&z[n+nd2],t);
    if (c1) c+=mr_incn(&z[n+nd2],&t[nd2],md2);
    if (c2) c+=mr_incn(&z[n],&t[nd2],md2);
    if (c1&c2) c++;        /* form (a0+a1)(b0+b1), carry in c */

    mr_karmul(nd2,&t[n],&x[nd2],&y[nd2],&z[n]);
    c-=mr_decn(&z[n],t,m);  /* subtract a1.b1 */
                           /* recreate a0.b0 in z */
    mr_incn(z,&z[nd2],md2);
    mr_decn(t,&z[nd2],md2);
    c-=mr_decn(z,t,m);       /* subtract a0.b0 */
    c+=mr_incn(t,&z[nd2],m);

    i=n+nd2;
    z[i]+=c;
    if (z[i]<c)
    { /* propagate carries - very rare */
        do 
        {
            i++;
            z[i]++;
        } while (z[i]==0);
    }
}

void kcm_mul(_MIPD_ big x,big y,big z)
{ /* fast karatsuba multiplication */ 
    unsigned int i;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    int ml=(int)mr_mip->modulus->len;
    zero(mr_mip->w0);
    for (i=2*ml;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;

    mr_karmul(ml,mr_mip->w7->w,x->w,y->w,mr_mip->w0->w);
    mr_mip->w0->len=mr_mip->w7->len=2*ml;
    copy(mr_mip->w0,z);
}

void kcm_multiply(_MIPD_ int n,big x,big y,big z)
{ /* n *must* be MR_KCM*2^m for m>=0 */   
    unsigned int i;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    zero(mr_mip->w0);
    for (i=2*n;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;
    mr_karmul(n,mr_mip->w7->w,x->w,y->w,mr_mip->w0->w);
    mr_mip->w0->len=mr_mip->w7->len=2*n;
    copy(mr_mip->w0,z);
}

void kcm_square(_MIPD_ int n,big x,big z)
{ /* n *must* be MR_KCM*2^m for m>=0 */
    unsigned int i;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    zero(mr_mip->w0);
    for (i=2*n;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;
    mr_karsqr(n,mr_mip->w7->w,x->w,mr_mip->w0->w);    
    mr_mip->w0->len=mr_mip->w7->len=2*n;
    copy(mr_mip->w0,z);
}

BOOL kcm_top(_MIPD_ int n,big x,big y,big z)
{ /* to support floating-point - see float.cpp and fmt function in big.cpp */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    unsigned int i;
    int len;
    zero(mr_mip->w0);

    if (n<MR_KCM)
    {
        mr_mip->check=OFF;
        multiply(_MIPP_ x,y,mr_mip->w0);
        mr_mip->check=ON;
    }
    else
    {
        for (i=2*n;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;
        if (x==y)  mr_karsqr(n,mr_mip->w7->w,x->w,mr_mip->w0->w);
        else       mr_karmul(n,mr_mip->w7->w,x->w,y->w,mr_mip->w0->w);
        mr_mip->w0->len=mr_mip->w7->len=2*n;
        mr_lzero(mr_mip->w0);
    }
    len=mr_lent(mr_mip->w0);
    mr_shift(_MIPP_ mr_mip->w0,n-len,mr_mip->w0);
    copy(mr_mip->w0,z);
    if (len<2*n) return TRUE;
    return FALSE;
}

void kcm_sqr(_MIPD_ big x,big z)
{ /* fast karatsuba squaring */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    unsigned int i;
    int ml=(int)mr_mip->modulus->len;
    zero(mr_mip->w0);
    for (i=2*ml;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;
    mr_karsqr(ml,mr_mip->w7->w,x->w,mr_mip->w0->w);
    mr_mip->w0->len=mr_mip->w7->len=2*ml;
    copy(mr_mip->w0,z);
}

void kcm_redc(_MIPD_ big z,big w)
{ /* fast karatsuba Montgomery reduction */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    int m,ml=(int)mr_mip->modulus->len;
    unsigned int i;
    m=ml/MR_KCM;
    copy(z,mr_mip->w0);

    for (i=2*ml;i<(mr_mip->w7->len&MR_OBITS);i++) mr_mip->w7->w[i]=0;
    mr_cpy(&(mr_mip->w0->w[ml]),w->w,m);

    mr_karmul_lower(ml,mr_mip->w7->w,mr_mip->w0->w,mr_mip->big_ndash->w,mr_mip->ws->w);

    for (i=ml;i<(w->len&MR_OBITS);i++) w->w[i]=0;

    mr_mip->ws->len=w->len=ml;

    mr_karmul_upper(ml,mr_mip->w7->w,mr_mip->ws->w,mr_mip->modulus->w,mr_mip->w0->w);
    mr_mip->w0->len=mr_mip->w7->len=2*ml;

    if (mr_decn(&(mr_mip->w0->w[ml]),w->w,m))
        mr_incn(mr_mip->modulus->w,w->w,m);

    mr_lzero(w);

}

#endif

