
#define BN_BIT          (rsa_bit+32)
#define BN_BYTE         BN_BIT/8
#define BN_DWORD        BN_BIT/32

typedef unsigned char BYTE;
typedef unsigned long DWORD;

void bn_mov0(DWORD x[])
{
  asm {
    cld
    mov     edi, x
    mov     ecx, BN_DWORD
    xor     eax, eax
    rep     stosd
  }
}

void bn_movc(DWORD x[], DWORD c)
{
  asm {
    cld
    mov     edi, x
    mov     eax, c
    stosd
    mov     ecx, BN_DWORD-1
    xor     eax, eax
    rep     stosd
  }
}

void bn_copy(DWORD dest[], DWORD src[])
{
  asm {
    cld
    mov     edi, dest
    mov     esi, src
    mov     ecx, BN_DWORD
    rep     movsd
  }
}

void bn_shl1(DWORD x[])
{
  asm{
    mov     edi, x
    xor     eax, eax
    mov     ecx, BN_DWORD
@@1:rcl     dword ptr [edi], 1
    lea     edi, [edi+4]
    loop    @@1
  }
}

int bn_getbit(DWORD x[], DWORD bitn)
{
  asm{
    mov     edi, x
    xor     eax, eax
    mov     ecx, bitn
    bt      [edi], ecx
    adc     eax, eax
  }
  return _EAX;
}

void bn_add(DWORD dest[], DWORD src[])
{
  asm {
    mov     edi, dest
    mov     esi, src
    mov     ecx, BN_DWORD
    xor     eax, eax
@@2:mov     eax, [esi]
    adc     [edi], eax
    lea     esi, [esi+4]
    lea     edi, [edi+4]
    loop    @@2
  }
}

void bn_sub(DWORD dest[], DWORD src[])
{
  asm {
    mov     edi, dest
    mov     esi, src
    xor     eax, eax
    mov     ecx, BN_DWORD
@@3:mov     eax, [esi]
    sbb     [edi], eax
    lea     esi, [esi+4]
    lea     edi, [edi+4]
    loop    @@3
  }
}

int bn_cmp(DWORD x[], DWORD y[])
{
  asm {
    mov     esi, x
    mov     edi, y
    mov     ecx, BN_DWORD-1
@@0:mov     eax, [esi+ecx*4]
    mov     ebx, [edi+ecx*4]
    cmp     eax, ebx
    jb      @@bb
    ja      @@aa
    dec     ecx
    jns     @@0
    xor     eax, eax
    jmp     @@rt
@@bb:
    mov     eax, -1
    jmp     @@rt
@@aa:
    mov     eax, 1
@@rt:
  }
  return _EAX;
}

int scanmaxbit(DWORD x[])
{
/*
  for (int i=BN_BIT-1; i>=0; i--)
    if (bn_getbit(x,i)!=0) return i;
  return 0;
*/
  asm {
    mov     esi, x
    add     esi, BN_BYTE-4
    mov     eax, BN_BIT
    mov     ecx, BN_DWORD
@@4:mov     edx, [esi]
    sub     esi, 4
    sub     eax, 32
    or      edx, edx
    loopz   @@4
    xor     esi, esi
    or      edx, edx
    jz      @@5
    bsr     esi, edx
@@5:add     eax, esi
  }
  return _EAX;
}

void bn_mul(DWORD r[], DWORD x[], DWORD y[])
{
  bn_mov0(r);
  for (int i=scanmaxbit(y); i>=0; i--)
  {
    bn_shl1(r);
    if (bn_getbit(y,i)) bn_add(r,x);
  }
}

void bn_mod(DWORD m[], DWORD x[], DWORD y[])
{
  bn_mov0(m);
  for (int i=scanmaxbit(x); i>=0; i--)
  {
    bn_shl1(m);
    m[0]|=bn_getbit(x,i);
    if (bn_cmp(m,y)>=0) bn_sub(m,y);
  }
}

void pascal bn_mulmod(DWORD r[], DWORD x[], DWORD y[], DWORD m[])
{
/*
  bn_mov0(r);
  for (int i=scanmaxbit(y); i>=0; i--)
  {
    bn_shl1(r);
    if (bn_cmp(r,m)>=0) bn_sub(r,m);
    if (bn_getbit(y,i))
    {
      bn_add(r,x);
      if (bn_cmp(r,m)>=0) bn_sub(r,m);
    }
  }
*/

  asm {
//  bn_mov0(r);
    mov     edi, r
    xor     eax, eax
    mov     ecx, BN_DWORD
    cld
    rep     stosd

//  for (int i=scanmaxbit(y); i>=0; i--)
//  {

    mov     esi, y
    add     esi, BN_BYTE-4
    mov     ebx, BN_BIT
    mov     ecx, BN_DWORD
@@q4:
    mov     edx, [esi]
    sub     esi, 4
    sub     ebx, 32
    or      edx, edx
    loopz   @@q4
    xor     esi, esi
    or      edx, edx
    jz      @@q5
    bsr     esi, edx
@@q5:
    add     ebx, esi

@@b:

//  bn_shl1(r);

    mov     edi, r
    xor     eax, eax
    mov     ecx, BN_DWORD
@@a:rcl     dword ptr [edi], 1
    lea     edi, [edi+4]
    loop    @@a

    call    @@cmpsub

//  if (bn_getbit(y,i))
//  {

    mov     eax, y
    bt      [eax], ebx
    jnc     @@c

//  bn_add(r,x);

    mov     esi, x
    mov     edi, r
    xor     eax, eax
    mov     ecx, BN_DWORD
@@d:mov     eax, [esi]
    adc     [edi], eax
    lea     esi, [esi+4]
    lea     edi, [edi+4]
    loop    @@d

    call    @@cmpsub

//    }
//  }

@@c:

    dec     ebx
    jns     @@b

    jmp     @@exit

@@cmpsub:

//  if (bn_cmp(r,m)>=0)

    mov     esi, r
    mov     edi, m
    mov     ecx, BN_DWORD-1
@@g:mov     eax, [esi+ecx*4]
    mov     edx, [edi+ecx*4]
    cmp     eax, edx
    jb      @@h
    ja      @@h2
    dec     ecx
    jns     @@g
@@h2:

// bn_sub(r,m);

    mov     esi, m
    mov     edi, r
    xor     eax, eax
    mov     ecx, BN_DWORD
@@i:mov     eax, [esi]
    sbb     [edi], eax
    lea     esi, [esi+4]
    lea     edi, [edi+4]
    loop    @@i
@@h:
    retn

@@exit:
  }
}

void bn_divmod(DWORD d[], DWORD m[], DWORD x[], DWORD y[])
{
  bn_mov0(d);
  bn_mov0(m);
  int maxbit=scanmaxbit(x);
  for (int i=maxbit; i>=0; i--)
  {
    bn_shl1(d);
    bn_shl1(m);
    m[0]|=bn_getbit(x,i);
    if (bn_cmp(m,y)>=0)
    {
      bn_sub(m,y);
      d[0]|=1;
    }
  }
}

void bn_powermod(DWORD x[], DWORD a[], DWORD b[], DWORD m[])
{
  bn_movc(x,1);
  DWORD p[BN_DWORD], t[BN_DWORD];
  bn_copy(p,a);
  int maxbit=scanmaxbit(b);
  for (int i=0; i<=maxbit; i++)
  {
    if (bn_getbit(b,i))
    {
      bn_mulmod(t,x,p,m);
      bn_copy(x,t);
    }
    bn_mulmod(t,p,p,m);
    bn_copy(p,t);
  }
}


