/************************************************************************ 
 * 
 *  idct.c, inverse fast DCT for tmndecode (H.263 decoder) 
 *  Copyright (C) 1995, 1996  Telenor R&D, Norway 
 * 
 *  Contacts: 
 *  Robert Danielsen                  <Robert.Danielsen@nta.no> 
 * 
 *  Telenor Research and Development  http://www.nta.no/brukere/DVC/ 
 *  P.O.Box 83                        tel.:   +47 63 84 84 00 
 *  N-2007 Kjeller, Norway            fax.:   +47 63 81 00 76 
 * 
 *  Copyright (C) 1997  University of BC, Canada 
 *  Modified by: Michael Gallant <mikeg@ee.ubc.ca> 
 *               Guy Cote <guyc@ee.ubc.ca> 
 *               Berna Erol <bernae@ee.ubc.ca> 
 * 
 *  Contacts: 
 *  Michael Gallant                   <mikeg@ee.ubc.ca> 
 * 
 *  UBC Image Processing Laboratory   http://www.ee.ubc.ca/image 
 *  2356 Main Mall                    tel.: +1 604 822 4051 
 *  Vancouver BC Canada V6T1Z4        fax.: +1 604 822 5949 
 * 
 ************************************************************************/ 
 
/* Disclaimer of Warranty 
 *  
 * These software programs are available to the user without any license fee 
 * or royalty on an "as is" basis. The University of British Columbia 
 * disclaims any and all warranties, whether express, implied, or 
 * statuary, including any implied warranties or merchantability or of 
 * fitness for a particular purpose.  In no event shall the 
 * copyright-holder be liable for any incidental, punitive, or 
 * consequential damages of any kind whatsoever arising from the use of 
 * these programs. 
 *  
 * This disclaimer of warranty extends to the user of these programs and 
 * user's customers, employees, agents, transferees, successors, and 
 * assigns. 
 *  
 * The University of British Columbia does not represent or warrant that the 
 * programs furnished hereunder are free of infringement of any 
 * third-party patents. 
 *  
 * Commercial implementations of H.263, including shareware, are subject to 
 * royalty fees to patent holders.  Many of these patents are general 
 * enough such that they are unavoidable regardless of implementation 
 * design. 
 *  
 */ 
 
 
 
/* based on mpeg2decode, (C) 1994, MPEG Software Simulation Group and 
 * mpeg2play, (C) 1994 Stefan Eckart <stefan@lis.e-technik.tu-muenchen.de> 
 *  
 */ 
 
 
/**********************************************************/ 
/* inverse two dimensional DCT, Chen-Wang algorithm       */ 
/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)             */ 
/* 32-bit integer arithmetic (8 bit coefficients)         */ 
/* 11 mults, 29 adds per DCT                              */ 
/* sE, 18.8.91       */ 
/**********************************************************/ 
/* coefficients extended to 12 bit for IEEE1180-1990      */ 
/* compliance                           sE,  2.1.94       */ 
/**********************************************************/ 
 
/* this code assumes >> to be a two's-complement arithmetic */ 
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2               */ 
 
#include "config.h" 
 
#define W1 2841                 /* 2048*sqrt(2)*cos(1*pi/16) */ 
#define W2 2676                 /* 2048*sqrt(2)*cos(2*pi/16) */ 
#define W3 2408                 /* 2048*sqrt(2)*cos(3*pi/16) */ 
#define W5 1609                 /* 2048*sqrt(2)*cos(5*pi/16) */ 
#define W6 1108                 /* 2048*sqrt(2)*cos(6*pi/16) */ 
#define W7 565                  /* 2048*sqrt(2)*cos(7*pi/16) */ 
 
/* global declarations */ 
void init_idct _ANSI_ARGS_ ((void)); 
void idct _ANSI_ARGS_ ((short *block)); 
 
/* private data */ 
static short iclip[1024];       /* clipping table */ 
static short *iclp; 
 
/* private prototypes */ 
static void idctrow _ANSI_ARGS_ ((short *blk)); 
static void idctcol _ANSI_ARGS_ ((short *blk)); 
 
/* row (horizontal) IDCT 
 *  
 * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- * 
 * ( k + - ) * l ) l=0                      8          2 
 *  
 * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */ 
 
static void idctrow (short *blk) 
{ 
  int x0, x1, x2, x3, x4, x5, x6, x7, x8; 
 
  /* shortcut */ 
  if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) | 
        (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) 
  { 
    blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3; 
    return; 
  } 
  x0 = (blk[0] << 11) + 128;    /* for proper rounding in the fourth stage */ 
 
  /* first stage */ 
  x8 = W7 * (x4 + x5); 
  x4 = x8 + (W1 - W7) * x4; 
  x5 = x8 - (W1 + W7) * x5; 
  x8 = W3 * (x6 + x7); 
  x6 = x8 - (W3 - W5) * x6; 
  x7 = x8 - (W3 + W5) * x7; 
 
  /* second stage */ 
  x8 = x0 + x1; 
  x0 -= x1; 
  x1 = W6 * (x3 + x2); 
  x2 = x1 - (W2 + W6) * x2; 
  x3 = x1 + (W2 - W6) * x3; 
  x1 = x4 + x6; 
  x4 -= x6; 
  x6 = x5 + x7; 
  x5 -= x7; 
 
  /* third stage */ 
  x7 = x8 + x3; 
  x8 -= x3; 
  x3 = x0 + x2; 
  x0 -= x2; 
  x2 = (181 * (x4 + x5) + 128) >> 8; 
  x4 = (181 * (x4 - x5) + 128) >> 8; 
 
  /* fourth stage */ 
  blk[0] = (x7 + x1) >> 8; 
  blk[1] = (x3 + x2) >> 8; 
  blk[2] = (x0 + x4) >> 8; 
  blk[3] = (x8 + x6) >> 8; 
  blk[4] = (x8 - x6) >> 8; 
  blk[5] = (x0 - x4) >> 8; 
  blk[6] = (x3 - x2) >> 8; 
  blk[7] = (x7 - x1) >> 8; 
} 
 
/* column (vertical) IDCT 
 *  
 * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] * 
 * cos( -- * ( k + - ) * l ) l=0                        8          2 
 *  
 * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */ 
static void idctcol (short *blk) 
{ 
  int x0, x1, x2, x3, x4, x5, x6, x7, x8; 
 
  /* shortcut */ 
  if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) | 
        (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3]))) 
  { 
    blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] = blk[8 * 7] = 
      iclp[(blk[8 * 0] + 32) >> 6]; 
    return; 
  } 
  x0 = (blk[8 * 0] << 8) + 8192; 
 
  /* first stage */ 
  x8 = W7 * (x4 + x5) + 4; 
  x4 = (x8 + (W1 - W7) * x4) >> 3; 
  x5 = (x8 - (W1 + W7) * x5) >> 3; 
  x8 = W3 * (x6 + x7) + 4; 
  x6 = (x8 - (W3 - W5) * x6) >> 3; 
  x7 = (x8 - (W3 + W5) * x7) >> 3; 
 
  /* second stage */ 
  x8 = x0 + x1; 
  x0 -= x1; 
  x1 = W6 * (x3 + x2) + 4; 
  x2 = (x1 - (W2 + W6) * x2) >> 3; 
  x3 = (x1 + (W2 - W6) * x3) >> 3; 
  x1 = x4 + x6; 
  x4 -= x6; 
  x6 = x5 + x7; 
  x5 -= x7; 
 
  /* third stage */ 
  x7 = x8 + x3; 
  x8 -= x3; 
  x3 = x0 + x2; 
  x0 -= x2; 
  x2 = (181 * (x4 + x5) + 128) >> 8; 
  x4 = (181 * (x4 - x5) + 128) >> 8; 
 
  /* fourth stage */ 
  blk[8 * 0] = iclp[(x7 + x1) >> 14]; 
  blk[8 * 1] = iclp[(x3 + x2) >> 14]; 
  blk[8 * 2] = iclp[(x0 + x4) >> 14]; 
  blk[8 * 3] = iclp[(x8 + x6) >> 14]; 
  blk[8 * 4] = iclp[(x8 - x6) >> 14]; 
  blk[8 * 5] = iclp[(x0 - x4) >> 14]; 
  blk[8 * 6] = iclp[(x3 - x2) >> 14]; 
  blk[8 * 7] = iclp[(x7 - x1) >> 14]; 
} 
 
/* two dimensional inverse discrete cosine transform */ 
void oldidct (short *block) 
{ 
  int i; 
 
  for (i = 0; i < 8; i++) 
    idctrow (block + 8 * i); 
 
  for (i = 0; i < 8; i++) 
    idctcol (block + i); 
} 
 
void init_idct () 
{ 
  int i; 
 
  iclp = iclip + 512; 
  for (i = -512; i < 512; i++) 
    iclp[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i); 
} 


/**********************************************************************
 *
 *          Multiplierless Approximation of the DCT
 *
 *      Trac D. Tran
 *      The Johns Hopkins University, ECE Department
 *      105 Barton Hall
 *      3400 N. Charles Street
 *      Baltimore MD 21218-2686
 *      410-516-7416 Office; 410-516-5566 Fax
 *      ttran@ece.jhu.edu
 *
 *      Copyright (c) 1998 Trac D. Tran
 *
 *   This program is Copyright (c) by Trac D. Tran.
 *   It may not be redistributed without the consent of the copyright
 *   holders. In no circumstances may the copyright notice be removed.
 *   The program may not be sold for profit nor may they be incorporated
 *   in commercial programs without the written permission of the copyright
 *   holders. This program is provided as is, without any express or
 *   implied warranty, without even the warranty of fitness for a
 *   particular purpose.
 *
 **********************************************************************/
/**********************************************************************
 *
 *  Name:        idct
 *  Description:    Descans zigzag-scanned coefficients and does
 *        inverse dct on 64 coefficients
 *                      single precision floats
 *
 *  Input:        64 coefficients, block for 64 pixels
 *  Returns:        0
 *  Side effects:
 *
 *  Date: 981225    Author: Trac D. Tran, ttran@ece.jhu.edu
 *
 **********************************************************************/


void idct(short *coeff)
{
  int                j1, i, j, k, t1, t2;
  int e, f, g, h;
  int b[8], b1[8];
  int d[8][8];

  coeff[0] >>= 1;
  /* Descan coefficients */
  for (i = 0, k=0; i < 8; i++) {
    for (j = 0; j < 8; j++) {
      d[i][j] = coeff[k++];
    }
  }  

  /* Vertical */

  for (i = 0; i < 8; i++) {
    /* get col */
    for (j = 0; j < 8; j++) {
      b[j] = d[j][i];
    }  
    /* to make it semi-compatible to the original IDCT */
    b[6]=-b[6]; b[7]=-b[7];

    /* 7pi/16 = 1/8u */
    e = b[7] + ((b[1] + 4) >> 3);
    h = b[1];
    /* 3pi/16 = -3/4d 1/2u -1/8d */
    b[3] += ((b[5] + 4) >> 3);
    f = b[5] - ((b[3] + 1) >> 1);
    g = b[3] + (((f<<1) + f + 2) >> 2);
    /* 3pi/16 = 1/2d -7/8u */
    /* g = b[3] + ((b[5] + 1) >> 1);
       f = b[5] - (((g << 2) + (g << 1) + g + 4) >> 3);*/

    b1[0] = b[0] + ((b[4]+1) >> 1);
    b1[1] = b1[0] - b[4];    
    /* 3pi/8 = -3/8d 3/8u */
    b1[3] = b[2] - (((b[6] << 1) + b[6] + 4) >> 3);
    b1[2] = b[6] + (((b1[3] << 1) + b1[3] + 4) >> 3);

    b[4] =  e + f;
    b1[5] = e - f;
    b[7] =  h + g;
    b1[6] = h - g;
    /* pi/4 = -5/8u -3/8d */
    b[5] = (((b1[6]<<2) + b1[6] + 4) >> 3) - b1[5];
    b[6] = b1[6] - (((b[5]<<1) + b[5] + 4) >> 3);

    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];

    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      d[j][i] =  b[j] + b[j1];
      d[j1][i] = b[j] - b[j1];
    }
  }

  /* Horizontal */

  for (i = 0; i < 8; i++) {
    /* get row */
    for (j = 0; j < 8; j++) {
      b[j] = d[i][j];
    }  
    /* to make it semi-compatible to the original IDCT */
    b[6]=-b[6]; b[7]=-b[7];
    
    /* 7pi/16 = 1/8u */
    e = b[7] + ((b[1] + 4) >> 3);
    h = b[1];
    /* 3pi/16 = -3/4d 1/2u -1/8d */
    b[3] += ((b[5] + 4) >> 3);
    f = b[5] - ((b[3] + 1) >> 1);
    g = b[3] + (((f<<1) + f + 2) >> 2);
    /* 3pi/16 = 1/2d -7/8u */
    /* g = b[3] + ((b[5] + 1) >> 1);
       f = b[5] - (((g << 2) + (g << 1) + g + 4) >> 3);*/

    b1[0] = b[0] + ((b[4]+1) >> 1);
    b1[1] = b1[0] - b[4];    
    /* 3pi/8 = -3/8d 3/8u */
    b1[3] = b[2] - (((b[6] << 1) + b[6] + 4) >> 3);
    b1[2] = b[6] + (((b1[3] << 1) + b1[3] + 4) >> 3);

    b[4] =  e + f;
    b1[5] = e - f;
    b[7] =  h + g;
    b1[6] = h - g;
    /* pi/4 = -5/8u -3/8d */
    b[5] = (((b1[6]<<2) + b1[6] + 4) >> 3) - b1[5];
    b[6] = b1[6] - (((b[5]<<1) + b[5] + 4) >> 3);

    b[0] = b1[0] + b1[3];
    b[1] = b1[1] + b1[2];
    b[2] = b1[1] - b1[2];
    b[3] = b1[0] - b1[3];

    for (j = 0; j < 4; j++) {
      j1 = 7 - j;
      d[i][j] =  (b[j] + b[j1] + 2) >> 2;
      d[i][j1] = (b[j] - b[j1] + 2) >> 2;
    }
  }

  for(i=0; i<8; i++)
    for(j=0; j<8; j++)
      coeff[i*8+j] = d[i][j];


}
