//
// bmath.asm
//
// Copyright (C) 1996-7 by Leonard Janke (janke@unixg.ubc.ca)

inline int BMath::BSR(const unsigned int x)
{
  int bitNum;
  asm("bsrl %1, %0\n\t"
      : "=r" (bitNum)
      : "rm" (x)
      );
  return bitNum;
}

inline int BMath::BSF(const unsigned int x)
{
  int bitNum;
  asm("bsfl %1, %0\n\t"
      : "=r" (bitNum)
      : "rm" (x)
      );
  return bitNum;
}

inline char BMath::GreaterThanOrEqualTo(const unsigned int* x,
					const unsigned int* y, 
					int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "setaeb %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::GreaterThan(const unsigned int* x, 
			       const unsigned int* y, 
			       int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "setab %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::LessThanOrEqualTo(const unsigned int* x,
				     const unsigned int* y, 
				     int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "setbeb %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::LessThan(const unsigned int* x, 
			    const unsigned int* y, 
			    int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "setbb %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::EqualTo(const unsigned int* x, 
			   const unsigned int* y, 
			   int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "seteb %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::NotEqualTo(const unsigned int* x, 
			      const unsigned int* y, 
			      int digits)
{
  char result;
  asm ("cld\n\t"
       "repe\n\t"
       "cmpsl (%%esi), (%%edi)\n\t"
       "setneb %%al\n\t"
       : "=al" (result), "=ecx" (digits)
       : "S" (x), "D" (y), "ecx" (digits)
       : "%al", "%esi", "%edi", "%ecx"
       );
  return result;
}

inline char BMath::BasicAdd(const unsigned int* x, 
			    const unsigned int* y, 
			    unsigned int* z, 
			    int digits)
{
  char carry;
  asm volatile("clc\n\t"
	       "decl %%ecx\n\t"
	       ""
	       "0:\tmovl (%%esi,%%ecx,4), %%eax\n\t"
	       "adcl (%%ebx,%%ecx,4), %%eax\n\t"
	       "movl %%eax, (%%edi,%%ecx,4)\n\t"
	       "decl %%ecx\n\t"
	       "jns 0b\n\t"
	       "setcb %0\n\t"
	       : "=rm" (carry), "=ecx" (digits) 
	       : "S" (x), "D" (z), "ebx" (y), "ecx" (digits) 
	       : "%eax", "%ecx", "memory"
	       );
  return carry;
}

inline void BMath::RippleAdd(const unsigned int* x, 
			     const unsigned int* y, 
			     unsigned int* z, 
			     int digits)
{
  asm volatile("decl %%ecx\n\t"
	       ""
	       "0:\tmovl (%%esi,%%ecx,4), %%eax\n\t"
	       "adcl (%%ebx,%%ecx,4), %%eax\n\t"
	       "movl %%eax, (%%edi,%%ecx,4)\n\t"
	       "decl %%ecx\n\t"
	       "jns 0b\n\t"
	       "jnc 2f\n\t"
               "1:\tadcl $0, (%%edi,%%ecx,4)\n\t"
               "decl %%ecx\n\t"
               "jc 1b\n\t"
	       "2:\t\n"
	       : "=ecx" (digits) 
	       : "S" (x), "D" (z), "ebx" (y), "ecx" (digits) 
	       : "%eax", "%ecx", "memory"
	       );
}


inline char BMath::BasicSubtract(const unsigned int* x, 
				 const unsigned int* y,
				 unsigned int* z, 
				 int digits)
{
  char borrow;
  asm volatile("clc\n\t"
	       "decl %%ecx\n\t"
	       "" 
	       "0:\tmovl (%%esi,%%ecx,4), %%eax\n\t"
	       "sbbl (%%edx,%%ecx,4), %%eax\n\t"
	       "movl %%eax, (%%ebx,%%ecx,4)\n\t"
	       "decl %%ecx\n\t"
	       "jns 0b\n\t"
	       "setcb %0\n\t"
	       : "=rm" (borrow), "=ecx" (digits)
	       : "S" (x), "edx" (y), "ebx" (z), "ecx" (digits)
	       : "%eax", "%ecx", "memory" 
	       );
  return borrow;
}

inline void BMath::RippleSubtract(const unsigned int* x, 
				  const unsigned int* y,
				  unsigned int* z, 
				  int digits)
{
  asm volatile("clc\n\t"
	       "decl %%ecx\n\t"
	       "" 
	       "0:\tmovl (%%esi,%%ecx,4), %%eax\n\t"
	       "sbbl (%%edx,%%ecx,4), %%eax\n\t"
	       "movl %%eax, (%%ebx,%%ecx,4)\n\t"
	       "decl %%ecx\n\t"
	       "jns 0b\n\t"
	       "jnc 2f\n\t"
               "1:\tsbbl $0, (%%ebx,%%ecx,4)\n\t"
               "decl %%ecx\n\t"
               "jc 1b\n\t"
	       "2:\t\n"
	       : "=ecx" (digits)
	       : "S" (x), "edx" (y), "ebx" (z), "ecx" (digits)
	       : "%eax", "%ecx", "memory" 
	       );
}

inline void BMath::RippleIncrement(unsigned int* x, int digits)
{
  asm volatile("0:\tdecl %%ecx\n\t"
	       "addl $1, (%%edi,%%ecx,4)\n\t"
	       "jc 0b\n\t"
	       : "=ecx" (digits)
	       : "ecx" (digits), "D" (x)
	       : "%ecx", "memory"
	       );
}

inline char BMath::Increment(unsigned int* x, int digits)
{
  char carry;
  asm volatile("decl %%ecx\n\t"
               "0:\taddl $1, (%%edi,%%ecx,4)\n\t"
               "jnc 1f\n\t"
               "decl %%ecx\n\t"
               "jns 0b\n\t"
               "1:\tsetcb %0\n\t"
               : "=rm" (carry), "=ecx" (digits)
               : "ecx" (digits), "D" (x)
               : "%ecx", "memory"
               );
  return carry;
}

inline void BMath::RippleDecrement(unsigned int* x, int digits)
{
  asm volatile("0:\tdecl %%ecx\n\t"
	       "subl $1, (%%edi,%%ecx,4)\n\t"
	       "jc 0b\n\t"
	       : "=ecx" (digits)
	       : "ecx" (digits), "D" (x)
	       : "%ecx", "memory"
	       );
}

inline void BMath::BasicMultiply(const unsigned int* x,
				 const unsigned int y,
				 unsigned int* z, 
				 int digitsX)
{
  // new multiplication algorithm taken from 
  // Knuth, _The_Art_of_Comptuer_Programming_, Volume II 
  // Pentium optimizations by Leonard Janke  
  //
  // last inspected January 30, 1997
  
  asm volatile("pushl %%ebp\n\t"
	       "movl $0, %%ebp\n\t"
               "decl %%ecx\n\t"
	       "js 2f\n\t"

               "0:\tmovl (%%esi,%%ecx,4), %%eax\n\t"
               "mull %%ebx\n\t"

	       "addl %%ebp, %%eax\n\t"
               "movl (%%edi,%%ecx,4), %%ebp\n\t"

               "adcl $0, %%edx\n\t"
               "addl %%ebp, %%eax\n\t"

	       "movl %%edx, %%ebp\n\t"
               "movl %%eax, (%%edi,%%ecx,4)\n\t"

               "adcl $0, %%ebp\n\t"
               "decl %%ecx\n\t"

	       "jns 0b\n\t"

	       "addl %%ebp, (%%edi,%%ecx,4)\n\t"
	       "jnc 2f\n\t"

               "1:\tdecl %%ecx\n\t"
               "adcl $0, (%%edi,%%ecx,4)\n\t"
               "jc 1b\n\t"

               "2:\tpopl %%ebp\n\t"
               : "=ecx" (digitsX)
               : "S" (x), "D" (z+1), "ebx" (y), "ecx" (digitsX)
               : "%eax", "%edx", "%ecx", "memory"
               );
}

inline void BMath::MultDouble(const unsigned int* x, 
			      const unsigned int* y,
			      unsigned int* z)
{
  // multiply two qwords
  // algorithm from Knuth
  // Pentium optimizations by Leonard Janke  
  //
  // last inspected January 30, 1997

  asm volatile("pushl %%ebp\n\t"

               /*********************************/
	       "movl 4(%%esi), %%eax\n\t"
	       "mull 4(%%ebx)\n\t"

	       "movl %%eax, 12(%%edi)\n\t"
	       "addl %%edx, %%eax\n\t"

	       "adcl $0, %%edx\n\t"
               "movl %%eax, 8(%%edi)\n\t"

               "movl %%edx, 4(%%edi)\n\t"

               /*********************************/
	       "movl (%%esi), %%eax\n\t"
	       "mull (%%ebx)\n\t"

               "movl 8(%%edi), %%ebp\n\t"
               "movl 4(%%edi), %%ecx\n\t"
               
               "addl %%eax, %%ebp\n\t" 
               "nop\n\t"

               "adcl %%edx, %%eax\n\t"
               "movl %%ebp, 8(%%edi)\n\t"

               "adcl $0, %%edx\n\t"
               "addl %%eax, %%ecx\n\t"

               "adcl $0, %%edx\n\t"
               "movl %%ecx, 4(%%edi)\n\t"

               "movl %%edx, (%%edi)\n\t"
               "movl $0, %%ecx\n\t"

               /********************************/

	       "movl (%%esi), %%eax\n\t"
	       "movl 4(%%esi), %%ebp\n\t"
	       "subl %%ebp, %%eax\n\t"
	       "jae 1f\n\t"

	       "negl %%eax\n\t"
	       "movl $1, %%ecx\n\t"

	       "1:\tmovl 4(%%ebx), %%edx\n\t"
               "movl (%%ebx), %%ebp\n\t"
               "subl %%ebp, %%edx\n\t"
	       "jae 2f\n\t"

	       "negl %%edx\n\t"
	       "xorl $1, %%ecx\n\t"

	       "2:\tmull %%edx\n\t"
	       "jecxz 3f\n\t"

               "movl 8(%%edi), %%ebp\n\t"
               "nop\n\t"

 	       "subl %%eax, %%ebp\n\t"
               "movl 4(%%edi), %%ecx\n\t"

	       "sbbl %%edx, %%ecx\n\t"
               "movl %%ebp, 8(%%edi)\n\t"

               "movl (%%edi), %%ebp\n\t"
               "movl %%ecx, 4(%%edi)\n\t"
              
	       "sbbl $0, %%ebp\n\t"
               "movl %%ebp, (%%edi)\n\t"
	              
               "jmp 4f\n\t"

               "3:\tmovl 8(%%edi), %%ebp\n\t"
               "nop\n\t"

               "addl %%eax, %%ebp\n\t"
               "movl 4(%%edi), %%ecx\n\t"

	       "adcl %%edx, %%ecx\n\t"
               "movl %%ebp, 8(%%edi)\n\t"

               "movl (%%edi), %%ebp\n\t"
               "movl %%ecx, 4(%%edi)\n\t"
              
	       "adcl $0, %%ebp\n\t"
               "movl %%ebp, (%%edi)\n\t"

	       "4:popl %%ebp\n\t"
	       : 
	       : "D" (z), "S" (x), "ebx" (y)
	       : "eax", "edx", "ecx", "memory"
	       );
}


inline void BMath::SquareDouble(const unsigned int* x,
				unsigned int* y)
{
  // square a qword
  // same algorithm as for multiplication but 
  // a few cycles can be saved
  // Pentium optimizations by Leonard Janke  
  //
  // written and tested vs MultDouble on January 30, 1997

  asm volatile("pushl %%ebp\n\t"

               /*********************************/
	       "movl 4(%%esi), %%eax\n\t"
	       "mull %%eax\n\t"

	       "movl %%eax, 12(%%edi)\n\t"
	       "addl %%edx, %%eax\n\t"

	       "adcl $0, %%edx\n\t"
               "movl %%eax, 8(%%edi)\n\t"

               "movl %%edx, 4(%%edi)\n\t"

               /*********************************/
	       "movl (%%esi), %%eax\n\t"
	       "mull %%eax\n\t"

               "movl 8(%%edi), %%ebp\n\t"
               "movl 4(%%edi), %%ecx\n\t"
               
               "addl %%eax, %%ebp\n\t" 
               "nop\n\t"

               "adcl %%edx, %%eax\n\t"
               "movl %%ebp, 8(%%edi)\n\t"

               "adcl $0, %%edx\n\t"
               "addl %%eax, %%ecx\n\t"

               "adcl $0, %%edx\n\t"
               "movl %%ecx, 4(%%edi)\n\t"

               "movl %%edx, (%%edi)\n\t"

               /********************************/

	       "movl (%%esi), %%eax\n\t"
	       "subl 4(%%esi), %%eax\n\t"
	       "jae 1f\n\t"

	       "negl %%eax\n\t"
	       "1:\tmull %%eax\n\t"

               "movl 8(%%edi), %%ebp\n\t"
               "nop\n\t"

 	       "subl %%eax, %%ebp\n\t"
               "movl 4(%%edi), %%ecx\n\t"

	       "sbbl %%edx, %%ecx\n\t"
               "movl %%ebp, 8(%%edi)\n\t"

               "movl (%%edi), %%ebp\n\t"
               "movl %%ecx, 4(%%edi)\n\t"
              
	       "sbbl $0, %%ebp\n\t"
               "movl %%ebp, (%%edi)\n\t"
	              
	       "4:popl %%ebp\n\t"
	       : 
	       : "D" (y), "S" (x)
	       : "eax", "edx", "ecx", "memory"
	       );
}

inline void BMath::BasicDivide(unsigned int dividendHigh, 
			       unsigned int dividendLow,
			       unsigned int divisor,
			       unsigned int& quotient,
			       unsigned int& remainder)
{
  asm("divl %4\n\t"
      : "=eax" (quotient), "=edx" (remainder)
      : "edx" (dividendHigh), "eax" (dividendLow), "rm" (divisor)
      : "%eax", "%edx"
      );
}

inline unsigned int BMath::ModSmall(const unsigned int* dividend, 
				    int digits, 
				    const unsigned int divisor)
{
  unsigned int remainder;
  asm("pushl $0\n\t"
      "pushl $1\n\t"
      "movl $1, %%edx\n\t"
      "movl $0, %%eax\n\t"
      "divl %%ebx\n\t"
      "pushl %%edx\n\t"

      "decl %%ecx\n\t"

      "0:\tmovl 4(%%esp), %%eax\n\t"
      "mull (%%esi,%%ecx,4)\n\t"
      "divl %%ebx\n\t"
      "movl %%edx, %%eax\n\t"
      "movl $0, %%edx\n\t"
      "addl 8(%%esp), %%eax\n\t"
      "adcl $0, %%edx\n\t"
      "divl %%ebx\n\t"
      "movl %%edx, 8(%%esp)\n\t"
      "movl 4(%%esp), %%eax\n\t"
      "mull (%%esp)\n\t"
      "divl %%ebx\n\t"
      "movl %%edx, 4(%%esp)\n\t"
      "decl %%ecx\n\t"
      "jns 0b\n\t"

      "popl %%eax\n\t"
      "popl %%eax\n\t"
      "popl %%eax\n\t"
      : "=eax" (remainder), "=ecx" (digits)
      : "S" (dividend), "ecx" (digits), "ebx" (divisor)
      : "eax", "ecx", "edx"
      );
  return remainder;
}

inline void BMath::ShortShiftLeft(unsigned int* x, 
				  int digits,
				  const char distance)
{
  asm volatile("movl $0, %%eax\n\t"
               "decl %%edx\n\t"
               ""
               "0:\tmovl (%%esi,%%edx,4), %%ebx\n\t"
               "shldl %%cl, %%eax, (%%esi,%%edx,4)\n\t"
               "movl %%ebx, %%eax\n\t"
               "decl %%edx\n\t"
               "jns 0b\n\t"
               : "=edx" (digits)
               : "S" (x), "edx" (digits), "cl" (distance)
               : "%eax", "%ebx", "%edx", "memory"
               );
}


inline void BMath::ShortShiftRight(unsigned int* x, 
				   int digits,
				   const char distance)
{
  asm volatile("movl $0, %%eax\n\t"
               "movl $0, %%edi\n\t"
               "0:\tmovl (%%esi,%%edi,4), %%ebx\n\t"
               "shrdl %%cl, %%eax, (%%esi,%%edi,4)\n\t"
               "movl %%ebx, %%eax\n\t"
               "incl %%edi\n\t"
               "cmpl %%edx, %%edi\n\t"
               "jne 0b\n\t"
               :
               : "S" (x), "edx" (digits), "cl" (distance)
               : "%eax", "%ebx", "%edi", "memory"
               );
}

inline void BMath::Div2(unsigned int* x, int digits)
{
  asm volatile("clc\n\t"
               "decl %%edx\n\t"
               "movl $0, %%ecx\n\t"
               "0:\trcrl $1, (%%esi,%%ecx,4)\n\t"
               "incl %%ecx\n\t"
               "decl %%edx\n\t"
               "jns 0b\n\t"
               :
               : "S" (x), "edx" (digits)
               : "%ecx", "%edx", "memory"
               );
}
