/**********************************************************************
 *
 *          Multiplierless Approximation of the DCT
 *
 *	    Trac D. Tran
 *	    The Johns Hopkins University, ECE Department
 *	    105 Barton Hall
 *	    3400 N. Charles Street
 *	    Baltimore MD 21218-2686
 *	    410-516-7416 Office; 410-516-5566 Fax
 *	    ttran@ece.jhu.edu
 *	    
 *	    Copyright (c) 1998 Trac D. Tran
 *
 *   This program is Copyright (c) by Trac D. Tran.
 *   It may not be redistributed without the consent of the copyright
 *   holders. In no circumstances may the copyright notice be removed.
 *   The program may not be sold for profit nor may they be incorporated
 *   in commercial programs without the written permission of the copyright
 *   holders. This program is provided as is, without any express or
 *   implied warranty, without even the warranty of fitness for a
 *   particular purpose.
 *
 **********************************************************************/

/**********************************************************************
 *
 *	Name:        Multiplierless Approximation of the FDCT
 *	Description:	Does dct on an 8x8 block + zigzag-scanning of
 *        coefficients
 *
 *	Input:        64 int pixels in a 2D array
 *	Returns:	64 int coefficients in a 1D array
 *	Side effects:	mapping integers to integers with 
 *                      perfect reconstruction for unifying
 *                      lossless + lossy coding. DC coefficient
 *                      has no bit expansion. 
 *
 *	Date: 981225	Author: Trac D. Tran
 *
 **********************************************************************/

void fdct( int *block, int **d)
{
  int        j1, i, j, k, t1, t2;
  int	     b[8];
  int        b1[8];

  /* Horizontal transform */
  for (i = 0, k = 0; i < 8; i++, k += 8) {
    /* get row */
    for (j = 0; j < 8; j++) {
      b[j] = block[k+j];
    }

    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      b1[j] =  b[j] + b[j1];
      b1[j1] = b[j] - b[j1];
    }
    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];
    d[i][0] = (b[0] + b[1]) >> 1;
    d[i][4] = b[0] - b[1];
    /* 3pi/8 = -3/8u 3/8d */
    d[i][6] = b[2] - (((b[3] << 1) + b[3] + 4) >> 3);
    d[i][2] = b[3] + (((d[i][6] << 1) + d[i][6] + 4) >> 3);    

    b[4] = b1[4];
    b[7] = b1[7];
    /* pi/4 = 3/8d 5/8u */
    b[6] = b1[6] + (((b1[5]<<1) + b1[5] + 4) >> 3);
    b[5] = (((b[6]<<2) + b[6] + 4) >> 3) -  b1[5];

    b1[4] = b[4] + b[5];
    b1[7] = b[7] + b[6];
    b1[5] = b[4] - b[5];
    b1[6] = b[7] - b[6];
    /* 7pi/16 = -1/8u */
    d[i][1] = b1[7];
    d[i][7] = b1[4] - ((b1[7] + 4) >> 3);
    /* 3pi/16 = 7/8u -1/2d */
    d[i][5] = b1[5] + (((b1[6]<<2) + (b1[6]<<1) + b1[6] + 4) >> 3);
    d[i][3] = b1[6] - ((d[i][5] + 1) >> 1);
  }

  /* Vertical transform */
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      b1[j] =  d[j][i] + d[j1][i];
      b1[j1] = d[j][i] - d[j1][i];
    }
    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];
    d[0][i] = (b[0] + b[1]) >> 1;
    d[4][i] = b[0] - b[1];
    /* 3pi/8 = -3/8u 3/8d */
    d[6][i] = b[2] - (((b[3] << 1) + b[3] + 4) >> 3);
    d[2][i] = b[3] + (((d[6][i] << 1) + d[6][i] + 4) >> 3);    

    b[4] = b1[4];
    b[7] = b1[7];
    /* pi/4 = 3/8d 5/8u */
    b[6] = b1[6] + (((b1[5]<<1) + b1[5] + 4) >> 3);
    b[5] = (((b[6]<<2) + b[6] + 4) >> 3) -  b1[5];

    b1[4] = b[4] + b[5];
    b1[7] = b[7] + b[6];
    b1[5] = b[4] - b[5];
    b1[6] = b[7] - b[6];
    /* 7pi/16 = -1/8u */
    d[1][i] = b1[7];
    d[7][i] = b1[4] - ((b1[7] + 4) >> 3);
    /* 3pi/16 = 7/8u -1/2d */
    d[5][i] = b1[5] + (((b1[6]<<2) + (b1[6]<<1) + b1[6] + 4) >> 3);
    d[3][i] = b1[6] - ((d[5][i] + 1) >> 1);
  }
  d[0][0] >>= 1;
}

/**********************************************************************
 *
 *	Name:        idct
 *	Description:	Descans zigzag-scanned coefficients and does
 *        inverse dct on 64 coefficients
 *                      single precision floats
 *
 *	Input:        64 coefficients, block for 64 pixels
 *	Returns:        0
 *	Side effects:	
 *
 *	Date: 981225	Author: Trac D. Tran, ttran@ece.jhu.edu
 *
 **********************************************************************/

void idct(int *coeff, int **d)
{
  int                j1, i, j, k, t1, t2;
  int e, f, g, h;
  int b[8], b1[8];

  coeff[0] <<= 1;
  /* Descan coefficients */
  for (i = 0, k=0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
      d[i][j] = coeff[k++];
    }
  }  

  /* Vertical */

  for (i = 0; i < 8; i++) {
    /* get col */
    for (j = 0; j < 8; j++) {
      b[j] = d[j][i];
    }  
    /* 7pi/16 = 1/8u */
    e = b[7] + ((b[1] + 4) >> 3);
    h = b[1];
    /* 3pi/16 = 1/2d -7/8u */
    g = b[3] + ((b[5] + 1) >> 1);
    f = b[5] - (((g << 2) + (g << 1) + g + 4) >> 3);

    b1[0] = b[0] + ((b[4]+1) >> 1);
    b1[1] = b1[0] - b[4];    
    /* 3pi/8 = -3/8d 3/8u */
    b1[3] = b[2] - (((b[6] << 1) + b[6] + 4) >> 3);
    b1[2] = b[6] + (((b1[3] << 1) + b1[3] + 4) >> 3);

    b[4] =  e + f;
    b1[5] = e - f;
    b[7] =  h + g;
    b1[6] = h - g;
    /* pi/4 = -5/8u -3/8d */
    b[5] = (((b1[6]<<2) + b1[6] + 4) >> 3) - b1[5];
    b[6] = b1[6] - (((b[5]<<1) + b[5] + 4) >> 3);

    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];

    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      d[j][i] =  b[j] + b[j1];
      d[j1][i] = b[j] - b[j1];
    }
  }

  /* Horizontal */

  for (i = 0; i < 8; i++) {
    /* get row */
    for (j = 0; j < 8; j++) {
      b[j] = d[i][j];
    }  
    /* 7pi/16 = 1/8u */
    e = b[7] + ((b[1] + 4) >> 3);
    h = b[1];
    /* 3pi/16 = 1/2d -7/8u */
    g = b[3] + ((b[5] + 1) >> 1);
    f = b[5] - (((g << 2) + (g << 1) + g + 4) >> 3);

    b1[0] = b[0] + ((b[4]+1) >> 1);
    b1[1] = b1[0] - b[4];    
    /* 3pi/8 = -3/8d 3/8u */
    b1[3] = b[2] - (((b[6] << 1) + b[6] + 4) >> 3);
    b1[2] = b[6] + (((b1[3] << 1) + b1[3] + 4) >> 3);

    b[4] =  e + f;
    b1[5] = e - f;
    b[7] =  h + g;
    b1[6] = h - g;
    /* pi/4 = -5/8u -3/8d */
    b[5] = (((b1[6]<<2) + b1[6] + 4) >> 3) - b1[5];
    b[6] = b1[6] - (((b[5]<<1) + b[5] + 4) >> 3);

    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];

    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      d[i][j] =  (b[j] + b[j1] + 8) >> 4;
      d[i][j1] = (b[j] - b[j1] + 8) >> 4;
    }
  }
}
