/* In background motion estimation, a upsampled previous frame is generated.

In full_search(), if upsample ==NULL, we will do interpolation in motion estimation.

If we use overlap mode for HVSBM, the fmv->mad may not be the true mean absolute MCP error. So
in split_block(), even though the children blocks have the same motion vector as their antecede,
the summation of their mad may small than that of their antecede(they have different overlap size
from their antecede.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "basic.h" 
#define EXTERN extern
#include "coderN.h"
#include "bmeN.h"
#include "miscN.h"
#include <time.h>

#define   FixOL     0    /* Overlap mode for FSBM */
#define   VarOL     1    /* Overlap mode for HVSBM */

void write_highband(YUVimage frame, videoinfo info, char *inname, int level);
void print_time(double sc);
/*
 * getINTRABLOCKmad
 * calculate MAD for blocks in INTRABLOCK mode.
 */
float getINTRABLOCKmad(float *fr1, int cx, int cy, int xblock, int yblock, int hor, int ver)
{
  int x, y, m, xblk, yblk;
  float diff, sum;

  xblk = (cx+xblock<=hor)? xblock : hor-cx; //real block width.
  yblk = (cy+yblock<=ver)? yblock : ver-cy; //real block height.

  if(xblk<=0 || yblk<=0) return 0;

  m = cy*hor+cx;
  sum = 0.;
  for(y=0 ; y<yblk ; y++){  
    for(x=0 ; x<xblk ; x++){
      diff = fr1[m];
      sum = (diff < 0)? sum-diff : sum+diff;  /* mean absolute error */
      m++;
	} /* x */
    m += hor-xblk;
  } /* y */
  sum /= xblk*yblk;
  return sum; 
}
/**********************************************************************
 *             stop_merge_loop()                                 
 * make a node with children at INTRABLOCK or REVERSE mode unmerged.        
 * fmv    : motion vector tree of a block
 * fr1    : current
 * (cx,cy): the coordinator of the upper left corner of a block. 
 * xblock : block width.
 * yblock : block height.
 * hor    : frame width.
 * ver    : frame height.
 **********************************************************************/ 
int stop_merge_loop(vector_ptr fmv, float *fr1, int cx, int cy, int xblock, int yblock, int hor, int ver)
{
  int stop_merge;
  
  if(fmv->child){
    stop_merge = 0;
	stop_merge |= stop_merge_loop(fmv->child0, fr1, cx,          cy,          xblock/2, yblock/2, hor, ver);
	stop_merge |= stop_merge_loop(fmv->child1, fr1, cx+xblock/2, cy,          xblock/2, yblock/2, hor, ver);
	stop_merge |= stop_merge_loop(fmv->child2, fr1, cx,          cy+yblock/2, xblock/2, yblock/2, hor, ver);
	stop_merge |= stop_merge_loop(fmv->child3, fr1, cx+xblock/2, cy+yblock/2, xblock/2, yblock/2, hor, ver);
  
  
	if(stop_merge == 1){
	  /*
      if(fmv->mode != INTRABLOCK){
		fmv->mode = INTRABLOCK;	  
        fmv->mad = getINTRABLOCKmad(fr1, cx, cy, xblock, yblock, hor, ver);
	  }*/
	  fmv->merge = NO;

	  return 1;
	}
	else{ // intra_mode == 0
	  fmv->merge = YES;
	  return 0;
	}
  }
  else{ // fmv->child=0
  
	if(cx>=hor || cy>=ver) return 0;

	if(fmv->mode == INTRABLOCK || fmv->mode == REVERSE)
	  return 1;
	else
	  return 0;
  }


}

void stop_merge(vector_ptr fmv, float *fr1, videoinfo info)
{
  int        hor, ver, xnum, ynum, xblock, yblock;
  int        x, y, X, Y, pos;

  hor  = info.ywidth; ver = info.yheight;
  xblock = info.xblk;   yblock = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;

  for(y=0, Y=0 ; Y<ynum ; y+=yblock, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblock, X++){
      pos = Y*xnum+X; 
	  stop_merge_loop(&fmv[pos], fr1, x, y, xblock, yblock, hor, ver);
    }
  }

}
/*
 * calculate MCP MSE
 * frame1 : current  frame0: reference. 
 * (cx,cy): the coordinator of the upper left corner. 
 * mvx    : horizontal motion vector
 * mvy    : vertical motion vector
 * xblk   : real block width.
 * yblk   : real block height.
 * hor    : frame width.
 * ver    : frame height.
 */
float MCP_MSE(float *frame1, int cx, int cy, float *frame0, float mvx, float mvy, int xblk, int yblk, int hor, int ver)
{
  int y, x, m;
  float px, py;
  float diff, ptemp, mean, sum;
  
  px = cx-mvx; py = cy-mvy;
  m = cy*hor+cx;

  sum = 0.; mean = 0.;
  for(y=0 ; y<yblk ; y++){  
    for(x=0 ; x<xblk ; x++){ 
      ptemp=interpolate(px+x,py+y,frame0,hor,ver,TYPE);
      diff = frame1[m] - ptemp;
      sum += diff*diff;  
      mean += diff;
      m++;
    } /* x */
    m += hor-xblk;
  } /* y */

  sum /= xblk*yblk;
  //mean /= xblk*yblk; printf("%f\n", mean); 
  //sum -= mean*mean;
  return sum;
}



/**********************************************************************
 *             find_MSE()                                 
 * calculate MSE and variance of  blocks at all tree nodes and decide mode.
 * fmv    : motion vector of a block at a tree node.
 * frame1 : curren.  
 * frame0 : reference. 
 * (cx,cy): the coordinator of the upper left corner. 
 * xblock : block width.
 * yblock : block height.
 * hor    : frame width.
 * ver    : frame height.
 * t_level  : temporal decomposition level (begin with 0)
 *********************************************************************/ 

void find_MSE(vector_ptr fmv, float *frame1, float *frame0, int cx, int cy, int xblock, int yblock, 
			  int hor, int ver, int t_level)
{
  int xblk, yblk, px, py;
  float pfx, pfy, ref_var;

  xblk = (cx+xblock<=hor)? xblock : hor-cx; //real block width.
  yblk = (cy+yblock<=ver)? yblock : ver-cy; //real block height.

  if(xblk<=0 || yblk<=0){   /* if block size if null, then return */
    return;
  }

  fmv->mse = MCP_MSE(frame1, cx, cy, frame0, fmv->mvx, fmv->mvy, xblk, yblk, hor, ver);
  fmv->var = variance(frame1+cy*hor+cx, xblk, yblk, hor);

  pfx = cx-fmv->mvx; pfy = cy-fmv->mvy;
  position(&px, &py, pfx, pfy, fmv->mvx, fmv->mvy, hor, ver);
  ref_var = variance(frame0+py*hor+px, xblk, yblk, hor);


  /*if(cx==192 && cy==128 && xblock==32 && yblock==32){
 	printf("DEFAULT mvx = %f, mvy=%f\n", fmv->mvx, fmv->mvy);
	printf("mse = %f, var = %f\n", fmv->mse, fmv->var);
    printf("ref_var = %f (bmeN.c)\n", ref_var);
  }*/

  //noise = NOISE_VAR * pow(2, t_level);
  if((fmv->mse < fmv->var && fmv->mse < ref_var) ||fmv->mse < NOISE_VAR*pow(2, (float)t_level/2.) ){ 
	fmv->mode = DEFAULT;
  }
  else{
	fmv->mode = INTRABLOCK;
  }

}



/**********************************************************************
 *             MSE_loop()                                 
 * calculate MSE and variance to find unmatched blocks in tree nodes.        
 * fmv    : motion vector tree of a block
 * frame1 : current
 * frame0 : reference. 
 * (cx,cy): the coordinator of the upper left corner of a block. 
 * xblock   : block width.
 * yblock   : block height.
 * hor    : frame width.
 * ver    : frame height.
 * dist   : temporal distance between fr1 and fr0
 **********************************************************************/ 
void MSE_loop(vector_ptr fmv, float *fr1, float *fr0, int cx, int cy, int xblock, int yblock, int hor, int ver, int level)
{
  if(cx>=hor || cy>=ver) return;

	
  if(fmv->mode == DEFAULT){  
    find_MSE(fmv, fr1, fr0, cx, cy, xblock, yblock, hor, ver, level);	
  }
	


  if(fmv->child){
	MSE_loop(fmv->child0, fr1, fr0, cx,          cy,          xblock/2, yblock/2, hor, ver, level);
	MSE_loop(fmv->child1, fr1, fr0, cx+xblock/2, cy,          xblock/2, yblock/2, hor, ver, level);
	MSE_loop(fmv->child2, fr1, fr0, cx,          cy+yblock/2, xblock/2, yblock/2, hor, ver, level);
	MSE_loop(fmv->child3, fr1, fr0, cx+xblock/2, cy+yblock/2, xblock/2, yblock/2, hor, ver, level);
  }
  else{
    /*if(fmv->mode == INTRABLOCK){
      fmv->mad = getINTRABLOCKmad(fr1, cx, cy, xblock, yblock, hor, ver);
	}*/
  }

}



/*****************************************************************************
 *                         find_unmatched_block()       
 * find unmatched blocks on leaves of the hierarchical tree
 * frame1-- current  frame0-- reference                            
 *****************************************************************************/
void find_unmatched_block(vector_ptr fmv, YUVimage fr1, YUVimage fr0, int t_level, videoinfo info)
{
  int        hor, ver, xnum, ynum, xblock, yblock;
  int        x, y, X, Y, pos, unconnect=0;

//  float      *upframe = NULL;

  hor  = info.ywidth; ver = info.yheight;
  xblock = info.xblk;   yblock = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;

//  if(half == 1)
//    Interpolate_frame(fr0.Y, hor, ver, &upframe, &uphor, &upver);

  for(y=0, Y=0 ; Y<ynum ; y+=yblock, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblock, X++){
      pos = Y*xnum+X; 
	  MSE_loop(&fmv[pos], fr1.Y, fr0.Y, x, y, xblock, yblock, hor, ver, t_level);
    }
  }

}





/*****************************************************************************/
/*                           rec_full_search()                               */
/* frame1-- current  frame0-- reference                                      */
/*****************************************************************************/
void rec_full_search(vector_ptr fmv, float *frame1, float *frame0, int x, int y, int xblock, int yblock, 				     
					 int maxx, int maxy, int hor, int ver, int t_level, float **upframe, int *uphor, int *upver, int subpel)
{
 int cx, cy;
 float mvx, mvy, mad;


   /* should use xblock and yblock instead of xblk and yblk */

 if(fmv->child){
   cx = x; cy = y;
   rec_full_search(fmv->child0, frame1, frame0, cx, cy, xblock/2, yblock/2, maxx, maxy, hor, ver, t_level, upframe, uphor, upver, subpel);

   cx = x + xblock/2; cy = y;
   rec_full_search(fmv->child1, frame1, frame0, cx, cy, xblock/2, yblock/2, maxx, maxy, hor, ver, t_level, upframe, uphor, upver, subpel);

   cx = x; cy = y + yblock/2;
   rec_full_search(fmv->child2, frame1, frame0, cx, cy, xblock/2, yblock/2, maxx, maxy, hor, ver, t_level, upframe, uphor, upver, subpel);

   cx = x + xblock/2; cy = y + yblock/2;
   rec_full_search(fmv->child3, frame1, frame0, cx, cy, xblock/2, yblock/2, maxx, maxy, hor, ver, t_level, upframe, uphor, upver, subpel);
 }


 if(fmv->mode != DEFAULT){
//  if(fmv->mode == REVERSE){
   full_search(&mvx, &mvy, &mad, frame1, frame0, x, y, xblock, yblock,
               maxx, maxy, hor, ver, 0, 0, upframe, uphor, upver, subpel, FixOL); /* nonoverlap*/
  
   if(fmv->mode == REVERSE){

     if(mad*REVERSE_WEIGHTING < fmv->mad){
       fmv->mvx = mvx;
       fmv->mvy = mvy;
       fmv->mad = mad;
	 }
     else{
       fmv->mode = INTRABLOCK;
	 }
   }
   else{
	 if(fmv->mode == INTRABLOCK){ 
       if(mad*REVERSE_WEIGHTING < fmv->mad){
         fmv->mvx = mvx;
         fmv->mvy = mvy;
         fmv->mad = mad;
         fmv->mode = REVERSE;
	   }
       else{
	   }
       /*find_MSE(fmv, frame1, frame0, x, y, xblock, yblock, hor, ver, t_level);
	   if(fmv->mode == DEFAULT){
         fmv->mode = REVERSE;
	     fmv->mvx = mvx;
	     fmv->mvy = mvy;
         fmv->mad = mad;
	   }
	   else{
         fmv->mode = INTRABLOCK;
	   }*/
	 }
	 else{
	   printf("fmv->mode is wrong (bmeN.c)\n");
	   exit(1);
	 }
   }


 }
}

/*
 *    Back_block_matching              
 * do motion estimation in the reverse direction for those blocks in REVERSE and INTRABLOCk modes detected in 
 * find_uncovered_block and find_unmatched_block.
 * frame1 : curren.  
 * frame0 : reference. 
 */
void Back_block_matching(vector_ptr fmv, YUVimage fr1, YUVimage fr0, videoinfo info, int t_level, int dist)
{
  int        i, hor, ver, xnum, ynum, xblk, yblk, maxx, maxy, *uphor, *upver;
  int        x, y, X, Y, pos;
  float **upframe;

  hor  = info.ywidth; ver = info.yheight;
  xblk = info.xblk;   yblk = info.yblk;
  /*  printf("xblk = %d yblk = %d\n", xblk, yblk);*/
  xnum = info.xnum;   ynum = info.ynum;

  
  maxx = (dist*info.maxx*2+1+maxx_refine)*2+1+maxx_refine;  
  maxy = (dist*info.maxy*2+1+maxy_refine)*2+1+maxy_refine;

  upframe = (float **)getarray(MY_MAX(info.subpel,1), sizeof(float *), "upframe(bmeN.c)");  
  uphor = (int *)getarray(MY_MAX(info.subpel,1)+1, sizeof(int), "uphor(bmeN.c)");  
  upver = (int *)getarray(MY_MAX(info.subpel,1)+1, sizeof(int), "upver(bmeN.c)");


  uphor++; uphor[-1]=hor;  // to simplify full_search
  upver++; upver[-1]=ver;  
  for(i=0;i<MY_MAX(info.subpel,1);i++){
	uphor[i]=uphor[i-1]*2-1;
	upver[i]=upver[i-1]*2-1;
	upframe[i] = NULL;
  }


  /* fixed size block matching */
  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
      pos = Y*xnum+X;

      rec_full_search(&fmv[pos], fr1.Y, fr0.Y, x, y, xblk, yblk, maxx, maxy, hor, ver, t_level, upframe, uphor, upver, info.subpel);
	}
  }


  free(upframe);
  free(&uphor[-1]);
  free(&upver[-1]);

}


/*****************************************************************************/
/*                              decide_split()                               */
/*****************************************************************************/
int decide_split(int cx, int cy, int xblk, int yblk, int layer, float pmad, float *mad)
{
  int   decision;
  float avgmad, penalty;

 penalty = 1.0;
  avgmad = (mad[0]+mad[1]+mad[2]+mad[3])/4;
  decision = (pmad > penalty * avgmad)? 1 : 0;

  return decision;
} 



/*****************************************************************************/
/*                               split_block()                               */
/*****************************************************************************/
void split_block(vector_ptr fmv,  float *fr1,  float *fr0, int cx, int cy, int xblk, int yblk, int pmvx, int pmvy, 
				 int maxx, int maxy, int hor, int ver,
				 float **upframe, int *uphor, int *upver, 
				 int subpel, int layer, int small)
{
  int  x, y, X, Y;
  float mvx0, mvx[4], mvy0, mvy[4], mad0, mad[4];

  /* check the blk is less than 4x4 or 4x3 or 3x3 */
  /* divide the region into 4 non-overlapping blocks */
  /* and estimate the motion vectors */
  /* and decide whether to split or not */

  if(xblk<=small || yblk<=small) return;

  for(y=cy, Y=0 ; y<cy+yblk ; y+=yblk/2, Y++){
    for(x=cx, X=0 ; x<cx+xblk ; x+=xblk/2, X++){
      full_search(&mvx0, &mvy0, &mad0, fr1, fr0, x, y, xblk/2, yblk/2,
                  maxx, maxy, hor, ver, pmvx, pmvy, upframe, uphor, upver, subpel, VarOL);
      mvx[Y*2+X]=mvx0; mvy[Y*2+X]=mvy0; mad[Y*2+X]=mad0;
    }
  }

#ifdef PREVENT_SPLIT
  int dflag, i;
  /* prevent splitting the same vectors or sending the same vectors*/
  dflag = 0;
  for(i=0 ; i<4 ; i++) if(fmv->mvx != mvx[i]  || fmv->mvy != mvy[i]) {dflag=1; break;}
  if(!dflag) return;

  if(decide_split(cx, cy, xblk, yblk, layer, fmv->mad, mad)){
    generate_child(fmv, mvx, mvy, mad);
  }
#else
  generate_child(fmv, mvx, mvy, mad);
#endif

  return;
}

void split(vector_ptr fmv, float *fr1, float *fr0, int cx, int cy, int xblk, int yblk, int pmvx, int pmvy, int maxx,
		   int maxy, int hor, int ver, float **upframe, int *uphor, int *upver, 
		   int subpel, int layer, int small)
{
  /* divide and decide at child level (level 2) */
  split_block(fmv, fr1, fr0, cx, cy, xblk, yblk, pmvx, pmvy, 
	      maxx, maxy, hor, ver, upframe, uphor, upver, subpel, layer, small);
  if(!(fmv->child)) return;

  /* divide and decide at grand children level (level=3) */
  /* pmvx = -(int)(fmv->child0->mvx); pmvy = -(int)(fmv->child0->mvy); */
  split_block(fmv->child0, fr1, fr0, cx,        cy,        xblk/2, yblk/2,
	      pmvx, pmvy, maxx, maxy, hor, ver, upframe, uphor, upver, subpel, layer, small);
    
  /* pmvx = -(int)(fmv->child1->mvx);  pmvy = -(int)(fmv->child1->mvy);*/
  split_block(fmv->child1, fr1, fr0, cx+xblk/2, cy,       xblk/2, yblk/2,
	      pmvx, pmvy, maxx, maxy, hor, ver, upframe, uphor, upver, subpel, layer, small);
 
  /* pmvx = -(int)(fmv->child2->mvx);  pmvy = -(int)(fmv->child2->mvy);*/
  split_block(fmv->child2, fr1, fr0, cx,        cy+yblk/2, xblk/2, yblk/2,
	      pmvx, pmvy, maxx, maxy, hor, ver, upframe, uphor, upver, subpel, layer, small);

  /* pmvx = -(int)(fmv->child3->mvx);  pmvy = -(int)(fmv->child3->mvy);*/
  split_block(fmv->child3, fr1, fr0, cx+xblk/2, cy+yblk/2, xblk/2, yblk/2,
	      pmvx, pmvy, maxx, maxy, hor, ver, upframe, uphor, upver, subpel, layer, small);

}

/*****************************************************************************/
/*                               grand_search()                              */
/* layer: hierarchical layer                                                 */
/*****************************************************************************/
void grand_search(vector_ptr fmv, float *fr1, float *fr0, int cx, int cy, int xblk, int yblk, int maxx, int maxy, 
				  int hor, int ver, float **upframe, int *uphor, int *upver, 
				  int half, int layer, int small)
{
  int   pmvx, pmvy;
  float mvx0, mvy0, mad0;

  /* update the vector */
  /* decide the matching is good enough */
  /* if mad>thshold, */
  /*      divide the region into 4 non-overlapping blocks */
  /*      and estimate the motion vectors */
  /*      and decide whether to split or not */

  pmvx = -(int)(2.*fmv->mvx);  pmvy = -(int)(2.*fmv->mvy); // origin of search region
  full_search(&mvx0, &mvy0, &mad0, fr1, fr0, cx,  cy,   xblk, yblk, 
	      maxx, maxy, hor, ver, pmvx, pmvy, upframe, uphor, upver, half, VarOL);
  fmv->mvx=mvx0; fmv->mvy=mvy0; fmv->mad=mad0;


  split(fmv, fr1, fr0, cx, cy, xblk, yblk, pmvx, pmvy, 
	      maxx, maxy, hor, ver, upframe, uphor, upver, half, layer, small);

      
  return;
}

/*
 *                             child_loop()                                  
 * (maxx1,maxy1): search region in the 1st pyramid level is used for finding exhaustive search region
 * fr1-- current  fr0-- reference  
 */
void child_loop(vector_ptr fmv, float *fr1, float *fr0, int cx, int cy, int xblk, int yblk, int maxx1, int maxy1, int hor, 
				int ver, float **upframe, int *uphor, int *upver, int subpel, int layer, int small, int level)
{
  int   pmvx, pmvy, newmaxx, newmaxy;
  float mvx0, mvy0, mad0;

  if(fmv->child){
    // refine motion vectors got in the previous level
    pmvx = -(int)(2.*fmv->mvx);  pmvy = -(int)(2.*fmv->mvy); //origin of search region
    full_search(&mvx0, &mvy0, &mad0, fr1, fr0, cx,  cy,   xblk, yblk, 
      maxx_refine, maxy_refine, hor, ver, pmvx, pmvy, upframe, uphor, upver, subpel, VarOL);
    fmv->mvx=mvx0; fmv->mvy=mvy0; fmv->mad=mad0;

    child_loop(fmv->child0, fr1, fr0, cx,        cy,        xblk/2, yblk/2, 
	       maxx1, maxy1, hor, ver, upframe, uphor, upver, subpel, layer, small, level);
    child_loop(fmv->child1, fr1, fr0, cx+xblk/2, cy,        xblk/2, yblk/2, 
	       maxx1, maxy1, hor, ver, upframe, uphor, upver, subpel, layer, small, level);
    child_loop(fmv->child2, fr1, fr0, cx,        cy+yblk/2, xblk/2, yblk/2, 
	       maxx1, maxy1, hor, ver, upframe, uphor, upver, subpel, layer, small, level);
    child_loop(fmv->child3, fr1, fr0, cx+xblk/2, cy+yblk/2, xblk/2, yblk/2, 
	       maxx1, maxy1, hor, ver, upframe, uphor, upver, subpel, layer, small, level);
  }
  else{ 
	if(cx>=hor || cy>=ver) return;

	//refine motion vectors got in the previous level
    pmvx = -(int)(2.*fmv->mvx);  pmvy = -(int)(2.*fmv->mvy); //origin of search region
    full_search(&mvx0, &mvy0, &mad0, fr1, fr0, cx,  cy,   xblk, yblk, 
      maxx_refine, maxy_refine, hor, ver, pmvx, pmvy, upframe, uphor, upver, subpel, VarOL);
    fmv->mvx=mvx0; fmv->mvy=mvy0; fmv->mad=mad0;

	//decide whether to use current available motion vectors as start points for motion estimation of child blocks
	find_MSE(fmv, fr1, fr0, cx, cy, xblk, yblk, hor, ver, level);
	if(fmv->mode == INTRABLOCK){ // we use exhaustive search instead of using motion vectors of the parent block as a start point
	  fmv->mode = DEFAULT; // still set to be DEFAULT mode

	  if(layer == 2){
	    pmvx = 0;
	    pmvy = 0;
		newmaxx = (maxx1*2+1)+maxx_refine; //printf("layer 2 newmaxx = %d\n", newmaxx);
		newmaxy = (maxy1*2+1)+maxy_refine;
	  }
	  else if(layer==3){
	    pmvx = 0;
	    pmvy = 0;
		newmaxx = (maxx1*2+1+maxx_refine)*2+1+maxx_refine; //printf("layer 3 newmaxx = %d\n", newmaxx);
		newmaxy = (maxy1*2+1+maxy_refine)*2+1+maxy_refine;
	  }
	  else{
		printf("layer is wrong (bmeN.c)\n");
		exit(1);
	  }
	}
	else{	//refinement
	  newmaxx = maxx_refine;
	  newmaxy = maxy_refine;
	}

    split(fmv, fr1, fr0, cx, cy, xblk, yblk, pmvx, pmvy, 
	  newmaxx, newmaxy, hor, ver, upframe, uphor, upver, subpel, layer, small);
  }
}



/*void FSBM(mvnode_ptr mvtop, vector_ptr fmv, YUVimage fr1, YUVimage fr0, videoinfo info, int dist, int half) APR23*/
void FSBM(vector_ptr fmv, YUVimage fr1, YUVimage fr0, videoinfo info, int dist, int half)

{
  int        hor, ver, xnum, ynum, xblk, yblk, maxx, maxy, level, *uphor, *upver;
  int        x, y, X, Y, pos, small, itemp;
  float      mvx, mvy, mad;
  float      **upframe;


  hor  = info.ywidth; ver = info.yheight;
  xblk = info.xblk;   yblk = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;
  maxx = dist*(info.maxx);  maxy = dist*(info.maxy);
  level = info.level;

  small=xblk; itemp=info.level;
  while(itemp!=1){ small/=2; itemp--;}

    for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
      for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
        pos = Y*xnum+X;
        fmv[pos].mvx=0.; fmv[pos].mvy=0.;

        full_search(&mvx, &mvy, &mad, fr1.Y, fr0.Y, x, y, xblk, yblk,
                    maxx, maxy, hor, ver, 0, 0, upframe, uphor, upver, half, FixOL);
        fmv[pos].mvx=mvx; fmv[pos].mvy=mvy; fmv[pos].mad = mad;
      }
    }
}

/*
 * hvsbm
 * realize hiearchical variable size block matching
 * fr1-- current  fr0-- reference             
 */
void hvsbm(vector_ptr fmv, YUVimage fr1, YUVimage fr0, videoinfo info, int t_level, int dist, int subpel)
{
  int        i, hor, ver, xnum, ynum, xblk, yblk, maxx, maxy, *uphor, *upver;
  int        x, y, X, Y, pos, small, itemp;
  float      *fr1LL, *fr0LL, *fr1LLLL, *fr0LLLL, **upframe;

  hor  = info.ywidth; ver = info.yheight;
  xblk = info.xblk;   yblk = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;
  
//  long   mark, elp;   // initial and elapsed time
//  double duration;


  maxx = dist*(info.maxx);  maxy = dist*(info.maxy);


  small=xblk; itemp=info.level;
  while(itemp!=1){ small/=2; itemp--;}


  if(hor%4 || ver%4){ printf("special consideration needed here!\n"); exit(1);}

  fr1LL   = (float *) getarray(hor/2*ver/2, sizeof(float), "fr1LL");
  fr0LL   = (float *) getarray(hor/2*ver/2, sizeof(float), "fr0LL");
  fr1LLLL = (float *) getarray(hor/4*ver/4, sizeof(float), "fr1LLLL");
  fr0LLLL = (float *) getarray(hor/4*ver/4, sizeof(float), "fr0LLLL");

  pyramid(fr1LL,   fr1.Y, hor, ver);
  pyramid(fr1LLLL, fr1LL, hor/2, ver/2);
  pyramid(fr0LL,   fr0.Y, hor, ver);
  pyramid(fr0LLLL, fr0LL, hor/2, ver/2);


  upframe = (float **)getarray(MY_MAX(subpel,1), sizeof(float *), "upframe(bmeN.c)");  
  for(i=0; i<MY_MAX(subpel,1); i++) upframe[i] = NULL;
  
  
  uphor = (int *)getarray(MY_MAX(subpel,1)+1, sizeof(int), "uphor(bmeN.c)");  
  upver = (int *)getarray(MY_MAX(subpel,1)+1, sizeof(int), "upver(bmeN.c)");
  uphor++; 
  upver++;
   
  /* 1st layer 
   * the start motion vector is (0,0), so exhaustive search is used for any block size of this level
   * 16x16, 8x8 and 4x4 
   */
  hor /=4; ver/=4; xblk/=4; yblk/=4;


  uphor[-1]=hor;  // to simplify full_search
  upver[-1]=ver;  
  Interpolate_frame(fr0LLLL, hor, ver, &upframe[0], &uphor[0], &upver[0], 1); // this can save much computation for half-pixel accuracy ME

  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
      pos = Y*xnum+X; 
      fmv[pos].mvx = fmv[pos].mvy = 0.; // start motion vector

      // always  half pixel accuracy	 
      grand_search(&fmv[pos], fr1LLLL, fr0LLLL,  x,  y, xblk, yblk,
                   maxx, maxy, hor, ver, upframe, uphor, upver, 1, 1, small);  
    }
  }
  free(upframe[0]); upframe[0] = NULL;


  /* 2nd layer
   * Refine motion vector tree generated in Level 1;
   * subdivide blocks in the leave of Level 1 and based on MSE and Var of those blocks(find_MSE) 
   * choose exhaustive search or refinement to find motion vectors for new generated child blocks.
   */
  hor *=2; ver *=2; xblk*=2; yblk*=2;
  uphor[-1]=hor;  // to simplify full_search
  upver[-1]=ver;  
  Interpolate_frame(fr0LL, hor, ver, &upframe[0], &uphor[0], &upver[0], 1);

  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
      pos = Y*xnum+X;
      // always  half pixel accuracy
      child_loop(&fmv[pos], fr1LL, fr0LL, x, y, xblk, yblk,
                 maxx, maxy, hor, ver, upframe, uphor, upver, 1, 2, small, t_level);    
    }
  }
  free(upframe[0]); upframe[0] = NULL;


  /* 3rd layer 
   * similar to Level 2
   */
  hor *=2; ver *=2;  xblk*=2; yblk*=2;
  uphor[-1]=hor;  // to simplify full_search
  upver[-1]=ver;  


    
  //mark = clock();

  if(subpel >= 1){
	for(i=0; i< MY_MIN(subpel,3); i++){
      Interpolate_frame(fr0.Y, hor, ver, &upframe[i], &uphor[i], &upver[i], i+1); 
	}
	for(i=MY_MIN(subpel,3); i<subpel; i++){
	  upframe[i] = NULL; // It is time-consuming to interpolate subpixels.
      //Interpolate_frame(fr0.Y, hor, ver, &upframe[i], &uphor[i], &upver[i], i+1); 
	}
  }


  //elp = clock() - mark;   
  //duration = (double)elp / CLOCKS_PER_SEC;
  //print_time(duration);






  /*temp   = (float *) getarray((uphor4-1)*upver4, sizeof(float), "fr1LL");
     for(i=0;i<upver4;i++)
	   for(j=0;j<uphor4-1;j++)
		 temp[i*(uphor4-1)+j] = upframe4[i*uphor4+j];
	img.Y = temp;   
	info.ywidth = uphor4-1; info.yheight = upver4;
	write_highband(img, info, "dd.ras", 0); exit(0);*/

  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
      pos = Y*xnum+X; 
      child_loop(&fmv[pos], fr1.Y, fr0.Y, x, y, xblk, yblk,
                 maxx, maxy, hor, ver, upframe, uphor, upver, subpel, 3, small, t_level);
    }
  }

  if(subpel >= 1){
	for(i=0; i< MY_MIN(subpel,3); i++){
      free(upframe[i]); 
	}
	//for(i=0; i<subpel; i++)
	//  free(upframe[i]);
  }

  free(upframe);
  free(&uphor[-1]);
  free(&upver[-1]);

  free(fr1LL);   free(fr0LL);
  free(fr1LLLL); free(fr0LLLL);
}





/*
 *                            block_matching()                              
 * block-based motion estimation 
 * fr1-- current  fr0-- reference                                      
 * dist: temporal distance between fr1 and fr0
 * level: temporal decomposition level (begin with 0)
 */
void block_matching(vector_ptr fmv, YUVimage fr1, YUVimage fr0, videoinfo info, int t_level, int dist, int subpel)
{

  switch(info.ME){
  case 0:
    FSBM(fmv, fr1, fr0, info, dist, subpel);
    break;
  case 1:
    hvsbm(fmv, fr1, fr0, info, t_level, dist, subpel);
	
    break;
  case 2:
    /*HCF(fmv,  fr1, fr0, info, dist); */ printf("not available (bmeN.c)!\n"); exit(1);
    break;
  default: printf("error in blockmatching()\n"); exit(1);
  }
}

/*****************************************************************************/
/*                                full_search()                              */
/*****************************************************************************/
void full_search(float *mvx, float *mvy, float *mad, float *frame1, float *frame0, int cx, int cy, 
				 int xblock, int yblock, int maxx, int maxy, int hor, int ver, int disx, int disy, 
				 float **upframe, int *uphor, int *upver, 
				 int subpel, int overlap)
/* frame1-- current  frame0-- reference */

{
  int   i, scale, dx, dy, hx, hy, px, py , px2, py2, m, n, xblk, yblk;
  int   leftx, rightx, lefty, righty, overx, overy, oxblk=0, oyblk=0;
  float px1, py1, sum, hmvx=0., hmvy=0., hmad;
//  FILE *fp;


  /***************************************************************/
  /* find (dx, dy) such that minimize                            */
  /*                                                             */
  /*     y=cy+yblk  x=cx+xblk                                    */ 
  /*       ----      -----                                       */
  /*       \         \      | frame1[x,y] - frame0[x-dx,y-dy] |  */
  /*       /         /                                           */
  /*       ----      -----                                       */
  /*       y=cy      x=cx                                        */
  /*                                                             */
  /* within  -maxx<=dx<=maxx, -maxy<=dy<=maxy                    */
  /***************************************************************/


  xblk = (cx+xblock<=hor)? xblock : hor-cx;
  yblk = (cy+yblock<=ver)? yblock : ver-cy;

  if(xblk<=0 || yblk<=0){   /* if block size if null, then return */
    *mvx = *mvy = *mad=0.;  /* will be used to decide whether to split*/
    return;
  }

  /* full pixel accurate estimation */
  overx = (overlap)? overlap_size(xblk) : 0;
  overy = (overlap)? overlap_size(yblk) : 0;
  *mad = (float)HUGE_VAL;
 

/*  
  if((fp=fopen("mad.txt", "wb")) == NULL){
	  printf("can not open file\n");
	  exit(1);
  }
*/


  for(dy=-maxy+disy ; dy<=maxy+disy ; dy++){
    for(dx=-maxx+disx ; dx<=maxx+disx ; dx++){
      py = cy+dy;  px = cx+dx;

      if(px>=0 && px<=(hor-xblk) && py>=0 && py<=(ver-yblk)){ /* boundary */

        /* block size change and initial point shift */
        leftx  = (px      -overx >= 0   && cx      -overx >= 0)?   overx : 0;
        rightx = (px+xblk +overx <= hor && cx+xblk +overx <= hor)? overx : 0;
        lefty  = (py      -overy >= 0   && cy      -overy >= 0 )?  overy : 0;
        righty = (py+yblk +overy <= ver && cy+yblk +overy <= ver)? overy : 0;

        oxblk = xblk + leftx + rightx; oyblk = yblk + lefty + righty; /* actual block size */
        m = (cy-lefty)*hor+(cx-leftx); n = (py-lefty)*hor+(px-leftx); 

        sum = MCP_Error(frame1, m, frame0, n, oxblk, oyblk, hor);

		sum = sum/(oxblk*oyblk);
        if(sum<*mad){*mad=sum; *mvx=-dx; *mvy=-dy;}  /* update */

      }
    }
  }
//  *mad /= oxblk*oyblk;   /* normalize the estimation error */   //move to the loops


  /************************************/
  /* half-pixel search                */
  /*          -------------           */
  /*          | 1 | 2 | 3 |           */
  /*          | 4 |   | 5 |           */
  /*          | 6 | 7 | 8 |           */
  /*          -------------           */
  /************************************/

  for(i=0; i< subpel; i++){
//printf("uphor %d upver %d\n", uphor[i], upver[i]);
	scale = (int)pow(2,i);
	hmad = HUGE_VAL;
    px1 = cx-*mvx;      py1 = cy-*mvy;
    px2 = (int)(px1*scale);  py2 = (int)(py1*scale); // integer pixel positions in the upsampled frame upframe[i-1];


    for(hy=-1 ; hy<=1 ; hy++){
      for(hx=-1 ; hx<=1 ; hx++){

		if(px2+hx>=0 && px2+hx<=(uphor[i-1]-xblk*scale) && py2+hy>=0 && py2+hy<=(upver[i-1]-yblk*scale)){  //search inside upframe[i-1]

          m = cy*hor+cx;

		// Subpel_MCP_Error and Subpel_MCP_Error2 are equivalent
        if(upframe[i] == NULL) // no interpolated frame available, so do the interpolation inside Subpel_MCP_Error.
          sum = Subpel_MCP_Error(frame1, m, frame0, px1, (float)hx/2./scale, py1, (float)hy/2./scale, xblk, yblk, hor, ver);
        else
          sum = Subpel_MCP_Error2(frame1, m, upframe[i], px2, hx, py2, hy, xblk, yblk, hor, ver, uphor[i], upver[i], scale*2);  //search inside upframe[i]


        if(sum<hmad){hmad=sum; hmvx=-hx; hmvy=-hy;}

        /*if(hx==0 && hy==0)
          if(sum/(xblk*yblk) != *mad)
            printf("sum %f  mad %f error\n", sum/(xblk*yblk), *mad);*/
		}
	  } /* hx */
	}  /* hy */
    hmad /= xblk*yblk;   /* normalize the estimation error */

    if(hmad<*mad){
  	  *mvx += hmvx/2./(float)scale; // be careful !!!
      *mvy += hmvy/2./(float)scale; // be careful !!!
      *mad =  hmad;
	}
           
  //if( cy-*mvy-hmvy/2>ver-yblk){
  //  printf("cy %d mvy %f  py %d hmvy %f\n", cy, *mvy, py, hmvy);
  //}

  }


  return;
}



/*****************************************************************************/
/*                           overlap_size()                                  */
/*****************************************************************************/
int overlap_size(int blk)
{
  int size;

  if     (blk==6 || blk==12 || blk==24 || blk==48) size = blk/6;
  else if(blk==8 || blk==16 || blk==32 || blk==64) size = blk/8;
  else if(blk==3 || blk==4)                        size = 1;
  else                                             size = 0;
  /*CCCC
  else{ printf("error in overalp_size() blk %d\n", blk); exit(1);}
  CCCC*/

  return size;
}

/*****************************************************************************/
/*                          generate_child()                                 */
/*****************************************************************************/
void generate_child(vector_ptr fmv, float *mvx, float *mvy, float *mad)
{
  /* allocate the memory to the child vector pointer */
  /* save the motion vector and estimation error */

  fmv->child = 1;
  fmv->child0 = (vector_ptr) getarray(1, sizeof(vector), "fmv->child0");
  fmv->child1 = (vector_ptr) getarray(1, sizeof(vector), "fmv->child1");
  fmv->child2 = (vector_ptr) getarray(1, sizeof(vector), "fmv->child2");
  fmv->child3 = (vector_ptr) getarray(1, sizeof(vector), "fmv->child3");


  fmv->child0->parent = fmv;
  fmv->child1->parent = fmv;
  fmv->child2->parent = fmv;
  fmv->child3->parent = fmv;

  fmv->child0->child = 0;
  fmv->child0->mode = DEFAULT;
  fmv->child0->merge = YES;
  fmv->child0->mvx = mvx[0];
  fmv->child0->mvy = mvy[0];
  fmv->child0->mad = mad[0];

  fmv->child1->child = 0;
  fmv->child1->mode = DEFAULT;
  fmv->child1->merge = YES;
  fmv->child1->mvx = mvx[1];
  fmv->child1->mvy = mvy[1];
  fmv->child1->mad = mad[1];

  fmv->child2->child = 0;
  fmv->child2->mode = DEFAULT;
  fmv->child2->merge = YES;
  fmv->child2->mvx = mvx[2];
  fmv->child2->mvy = mvy[2];
  fmv->child2->mad = mad[2];

  fmv->child3->child = 0;
  fmv->child3->mode = DEFAULT;
  fmv->child3->merge = YES;
  fmv->child3->mvx = mvx[3];
  fmv->child3->mvy = mvy[3];
  fmv->child3->mad = mad[3];
}

/*****************************************************************************/
/*                                  pyramid()                                */
/*****************************************************************************/
void pyramid(float *frameLL, float *frame, int hor, int ver)
{
  int x, y;
  float lpf[7];
  float *ibuf, *obuf, *buf1, *buf2;

  /* build the pyramids */
  /* low-pass filtering and subsampling */

  lpf[3] = 0.54;            /* [-29 0 88 138 88 0 -29]//256 */
  lpf[2] = lpf[4] =  0.34;
  lpf[1] = lpf[5] =  0.;
  lpf[0] = lpf[6] = -0.11;

  ibuf = (float*) getarray(hor*ver, sizeof(float), "ibuf");
  obuf = (float*) getarray(hor*ver, sizeof(float), "obuf");
  buf1 = (float*) getarray(ver, sizeof(float), "buf1");
  buf2 = (float*) getarray(ver, sizeof(float), "buf2");

  for(y=0 ; y<ver ; y++){
    for(x=0 ; x<hor ; x++){
      ibuf[y*hor+x] = frame[y*hor+x];   
    }
  }
  
  /* LL */
  filter_2d(obuf, ibuf, buf1, buf2, hor, ver, lpf, 0, 7, 1);
  for(y=0 ; y<ver ; y+=2){   /* subsampling */
    for(x=0 ; x<hor ; x+=2){
      frameLL[(y/2)*hor/2+(x/2)] = obuf[y*hor+x];
	/*	(obuf[y*hor+x]>0.0)? obuf[y*hor+x]+0.5 : obuf[y*hor+x]-0.5;*/
    }
  }

  free(ibuf); free(obuf); free(buf1); free(buf2);
  return;
}



  /******************************************/
  /*   o: original position                 */
  /*   x: interpolated position             */
  /*          ---------------------         */
  /*          | o | x | o | x | o |         */
  /*          | x | x | x | x | x |         */
  /*          | o | x | o | x | o |         */
  /*          | x | x | x | x | x |         */
  /*          | o | x | o | x | o |         */
  /*          ---------------------         */
  /******************************************/







/******************************************************************
                  Compute MCP error with half pixel accuracy

   m:        initial position in current frame-frame1
   (px, py): initial position in previous frame- frame0
   (hx, hy): sub-pixel search
   xblk: block width
   yblk: block height
   hor:   frame1 width
   ver:   frame1 height
*******************************************************************/
float Subpel_MCP_Error(float *frame1, int m, float *frame0, float px, float hx, float py, float hy, int xblk, int yblk, int hor, int ver)
{
  int y, x;
  float diff, ptemp, sum;

  sum = 0.;
  for(y=0 ; y<yblk ; y++){  /* calculate the error */
    for(x=0 ; x<xblk ; x++){
	  
      ptemp=interpolate(px+hx+x,py+hy+y,frame0,hor,ver,TYPE);
      /* check Bilinear interpolation
            if(hx ==0 && hy == 0)
              if(ptemp != frame0[(py+y)*hor+px+x])
                printf("ptemp %f  frame0 %f(half_pixel)\n", ptemp, frame0[(py+y)*hor+px+x]);
      */

      diff = frame1[m] - ptemp;
      sum = (diff < 0)? sum-diff : sum+diff;  /* mean absolute error */
            /*if(sum >= hmad) break;*/
      m++;
    } /* x */
    m += hor-xblk;
  } /* y */

  return sum;
}




/******************************************************************
                  Compute MCP error with half pixel accuracy

   m:        initial position in current frame-frame1
   (px, py): initial integer position in previous frame
   upframe:  upsampled previous frame. (generated by Interpolate_frame)
   (hx, hy): half-pixel search
   xblk: block width
   yblk: block height
   hor:   frame1 width
   ver:   frame1 height
   uphor: upsampled frame width
   upver: upsampled frame height
   step : step size in reference frame (2 for once upsampled image, 4 for twice upsampled image)
*******************************************************************/
float Subpel_MCP_Error2(float *frame1, int m, float *upframe, int px, int hx, int py, int hy, int xblk, int yblk, int hor, int ver, int uphor, int upver, int step)
{
  int y, x;
  float diff, sum;

  sum = 0.;
  for(y=0 ; y<yblk ; y++){  /* calculate the error */
    for(x=0 ; x<xblk ; x++){

      diff = frame1[m] - upframe[(py*2+y*step+hy)*uphor+(px*2+x*step)+hx];
      sum = (diff < 0)? sum-diff : sum+diff;  /* mean absolute error */
            /*if(sum >= hmad) break;*/
      m++;
    } /* x */
    m += hor-xblk;
  } /* y */

  return sum;
}


/******************************************************************
                  Compute MCP error with integer accuracy

   m:     initial position in current frame-frame1
   n:     initial position in previous frame-frame0
   oxblk: block width
   oyblk: block height
   hor:   image width
*******************************************************************/
float MCP_Error(float *frame1, int m, float *frame0, int n, int oxblk, int oyblk, int hor)
{
  int y, x;
  float diff, sum;

  sum = 0.;
  for(y=0 ; y<oyblk ; y++){
    for(x=0 ; x<oxblk ; x++){
      diff = frame1[m] - frame0[n];
      sum = (diff < 0)? sum-diff : sum+diff;  /* mean absolute error */
      m++; n++;
    }
    m += hor-oxblk; n += hor-oxblk;
  }

  return sum;
}













/*****************************************************************************/
/*                           rec_set        ()                               */
/*****************************************************************************/
void rec_set(vector_ptr fmv, int x, int y, int xblock, int yblock, int hor, int ver)
 /* frame1--current frame0--reference frame*/
{
 int cx, cy, uphor=0, upver=0;


 if(fmv->mode == DEFAULT){
   fmv->mode = REVERSE;
 }
   /* should use xblock and yblock instead of xblk and yblk */

 if(fmv->child){
   cx = x; cy = y;
   rec_set(fmv->child0, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x + xblock/2; cy = y;
   rec_set(fmv->child1, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x; cy = y + yblock/2;
   rec_set(fmv->child2, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x + xblock/2; cy = y + yblock/2;
   rec_set(fmv->child3, cx, cy, xblock/2, yblock/2, hor, ver);
 }

}

/*
 *   set_bi()
 * If using motion estimation in the REVERSE direction for the whole frame,
 * we need to change the mode from DEFAULT to REVERSE
 */
void set_bi(vector_ptr fmv, videoinfo info)
 /* fr1 is current frame, fr0 is next frame*/
{
  int        hor, ver, xnum, ynum, xblk, yblk;
  int        x, y, X, Y, pos;

  hor  = info.ywidth; ver = info.yheight;
  xblk = info.xblk;   yblk = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;

  /* fixed size block matching */
  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){
      pos = Y*xnum+X;

      rec_set(&fmv[pos], x, y, xblk, yblk, hor, ver);
      }
    }
}





/*
 *                               position1D()                                  
 * find the matching 1D integer position for sub-pixel accurate temporal filtering 
 *
 */

int position1D(float pfx, float mvx)
{
  int tmpx;
/*
  switch(((int)(4.*pfx))%4){
  case 0:
	tmpx = (int)pfx;                       // integer pixel 
	break;
  case 1:
  case 3:
    tmpx = nint(pfx); // 1/4 pixel, 3/4 pixel
	break;
  case 2:
    tmpx = (mvx>0.)? (int)ceil(pfx) : (int)floor(pfx);  // half pixel 
    break;

	break;
  default:
    printf("error (bmeN.c)\n");
	exit(1);
  }
*/
  if((int)pfx == pfx) tmpx = (int)pfx;    // integer pixel 
  else if(((int)(pfx*2))/2. == pfx)     tmpx = (mvx>0.)? (int)ceil(pfx) : (int)floor(pfx);  // half pixel 
  else     tmpx = nint(pfx);  // otherwise

  return tmpx;
}
/*
 *                               position()                                  
 * find the matching integer position for sub-pixel accurate temporal filtering 
 *
 */
void position(int *px, int *py, float pfx, float pfy, float mvx, float mvy, int hor, int ver)
{
  int tmpx, tmpy;

  
  tmpx = position1D(pfx, mvx);
  tmpy = position1D(pfy, mvy);


  // clipping  ???
  if(tmpx<0)          tmpx=0;
  else if(tmpx>hor-1) tmpx=hor-1;

  if(tmpy<0)          tmpy=0;
  else if(tmpy>ver-1) tmpy=ver-1;

  *px = tmpx;
  *py = tmpy;
}





/*****************************************************************************/
/*                                  inbound()                                */
/*****************************************************************************/
int inbound(float x, float y, int hor, int ver)
{
  if(x>=0. && x<=(float)(hor-1) && y>=0. && y<=(float)(ver-1))
       return 1;
  else return 0;
}

/*****************************************************************************/
/*                               get_cvector                                 */
/*****************************************************************************/
void get_cvector(float *cmvx, float *cmvy, float *ymvx, float *ymvy, int yhor, int yver, int chor, int cver, videoinfo info)
{
  int x, y, pos;
  float dx, dy;
  int t1, t2, scale;
  
  scale = 1 << info.subpel;


  /* get the motion vector for the chrominance component */
  /* from the luminance vectors */
  /* half */
  /* yfv    dx     t1    dx    cfv */
  /* +-0.5  +-.25    0     0.  0.  */
  /* +-1.0  +-.5     0   +-.5  +-0.5 */
  /* +-1.5  +-.75    0   +-.5  +-0.5 */
  /* +-2.0  +-1.0  +-1   0.    +-1.0 */
  /* +-2.5  +-1.25 +-1   0.    +-1.0 */
  /* +-3.0  +-1.50 +-1   +-.5  +-1.5 */

  /* quater */
  /* yfv    dx     t1    dx    cfv */
  /* 0.25   0.125   0    0         */
  /* 0.5    0.25    0    0.25      */
  /* 0.75   0.375   0    0.25      */
  /* 1      0.5     0    0.5       */
  /* 1.25   0.625   0    0.5       */
  /* 1.5    0.75    0    0.75      */
  /* 1.75   0.875   0    0.75      */



  if(yver == 2*cver && yhor == 2*chor){
    for(y=0 ; y<cver ; y++){
      for(x=0 ; x<chor ; x++){

         dx = ymvx[(2*y)*yhor+(2*x)];

        if(dx == Infinity)
          cmvx[y*chor+x] = dx;
        else{
          dx /= 2.0;
          
		  t1 =(int)dx;

          /*if (dx > 0.0) dx =(dx < t1+0.5)? (float)0.0 : (float)0.5;
          else          dx = (dx > t1-0.5)? (float)0.0 : (float)-0.5;*/
         
		  dx = (float)((int)(dx*scale)%scale) / (float)scale;

          cmvx[y*chor+x] = t1 + dx;

		}

        dy = ymvy[(2*y)*yhor+(2*x)];
        if(dy == Infinity)
          cmvy[y*chor+x] = dy;
        else{
          dy /= 2.0;
          
		  t2 = (int)dy;

          /*if (dy > 0.0) dy = (dy < t2+0.5)? (float)0.0 : (float)0.5;
          else          dy = (dy > t2-0.5)? (float)0.0 : (float)-0.5;*/

		  dy = (float)((int)(dy*scale)%scale) / (float)scale;

          cmvy[y*chor+x] = t2 + dy;



		}
	  }
	}
  }//420 
  else
	if(yver == cver && yhor == chor){ // 444
      for(y=0 ; y<cver ; y++){
        for(x=0 ; x<chor ; x++){
		  pos = y*chor+x;
          cmvx[pos] = ymvx[pos];
          cmvy[pos] = ymvy[pos];
		}
	  }
	}
	else{
		printf("can not handle this case (mctfN.c)\n");
		exit(1);
	}

}








/*****************************************************************************
 *                         block2pixel2()            
 * get dense motion field from block-based motion vectors 
 * except blocks with DEFAULT mode.
 * 
 *****************************************************************************/
void block2pixel2(float *mvx, float *mvy, vector_ptr fmv, int cx, int cy, int xblk, int yblk, int hor, int ver, enum BlockMode block_mode)
{
  int i, j, xblock, yblock, pos;

  /* change the structure of motion vectors */
  /* from the block-based to the pixel-based */
  /* write the motion vector of the block recursively */

  if(fmv->child){
    block2pixel2(mvx,mvy,fmv->child0,cx,       cy,       xblk/2,yblk/2,hor,ver, block_mode);
    block2pixel2(mvx,mvy,fmv->child1,cx+xblk/2,cy,       xblk/2,yblk/2,hor,ver, block_mode);
    block2pixel2(mvx,mvy,fmv->child2,cx,       cy+yblk/2,xblk/2,yblk/2,hor,ver, block_mode);
    block2pixel2(mvx,mvy,fmv->child3,cx+xblk/2,cy+yblk/2,xblk/2,yblk/2,hor,ver, block_mode);
  }
  else{
    /* consider the small block around the boundaries */
    xblock = (cx+xblk<=hor)?  xblk : hor-cx;
    yblock = (cy+yblk<=ver)?  yblk : ver-cy;

    if(xblock<=0 || yblock<=0){
/*      printf("xblock<=0 || yblock<=0 in block2pixel2() !\n");*/
      return;
    }


	if(fmv->mode == block_mode){ 
      for(i=cy ; i<cy+yblock ; i++){
        for(j=cx ; j<cx+xblock ; j++){
          pos = i*hor+j;
          mvx[pos] = fmv->mvx;
          mvy[pos] = fmv->mvy;
		}
	  }
	}
    else{
      for(i=cy ; i<cy+yblock ; i++){
        for(j=cx ; j<cx+xblock ; j++){
          pos = i*hor+j;
          mvx[pos] = Infinity;
          mvy[pos] = Infinity;
		}
	  }
	}

  }  
} 



/*****************************************************************************/
/*                              block2pixel3()                                */
/*****************************************************************************/
void block2pixel3(float *mvx, float *mvy, vector_ptr fmv, int cx, int cy, int xblk, int yblk, int hor, int ver)
{
  int i, j, xblock, yblock, pos;

  /* change the structure of motion vectors */
  /* from the block-based to the pixel-based */
  /* write the motion vector of the block recursively */

  if(fmv->child){
    block2pixel3(mvx,mvy,fmv->child0,cx,       cy,       xblk/2,yblk/2,hor,ver);
    block2pixel3(mvx,mvy,fmv->child1,cx+xblk/2,cy,       xblk/2,yblk/2,hor,ver);
    block2pixel3(mvx,mvy,fmv->child2,cx,       cy+yblk/2,xblk/2,yblk/2,hor,ver);
    block2pixel3(mvx,mvy,fmv->child3,cx+xblk/2,cy+yblk/2,xblk/2,yblk/2,hor,ver);
  }
  else{
    /* consider the small block around the boundaries */
    xblock = (cx+xblk<=hor)?  xblk : hor-cx;
    yblock = (cy+yblk<=ver)?  yblk : ver-cy;

    if(xblock<=0 || yblock<=0){
      /*printf("xblock<=0 || yblock<=0 in block2pixel() !\n");*/
      return;
    }
    for(i=cy ; i<cy+yblock ; i++){
      for(j=cx ; j<cx+xblock ; j++){
        pos = i*hor+j;      
		mvx[pos] = fmv->mvx;       
		mvy[pos] = fmv->mvy;

      }
    }
  }
}


/*****************************************************************************/
/*                           rec_weighMV    ()                               */
/*****************************************************************************/
void rec_weighMV(vector_ptr fmv, float weight, int x, int y, int xblock, int yblock, int hor, int ver)
 /* frame1--current frame0--reference frame*/
{
 int cx, cy, uphor=0, upver=0, px, py;


 	 /*px = x - (int)(weight*(fmv->mvx));
     if(px>=0 && px<=(hor-xblock)) */
	     // inbound
	     fmv->mvx *= weight;
     /*else if (px >= 0)
	     // means that px > hor-xblock
	     fmv->mvx = (float)(x-(hor-xblock));
     else if (px <= (hor-xblock))
	     // means that px < 0 
	     fmv->mvx  = (float)x; */ 
     //else fmv->mvx = 0.0; 
 
	 /*py = y - (int)(weight*(fmv->mvy));
     if(py>=0 && py<=(ver-yblock))
	     // inbound */
	     fmv->mvy *= weight;
     /* else if (py >= 0)
	     // means that py > ver-yblock
	     fmv->mvy = (float)(y-(ver-yblock));
     else if (py <= (ver-yblock))
	     // means that py < 0 
	     fmv->mvy  = (float)y; */ 
     //else fmv->mvy = 0.0; 
 


 if(fmv->child){
   cx = x; cy = y;
   rec_weighMV(fmv->child0, weight, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x + xblock/2; cy = y;
   rec_weighMV(fmv->child1, weight, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x; cy = y + yblock/2;
   rec_weighMV(fmv->child2, weight, cx, cy, xblock/2, yblock/2, hor, ver);

   cx = x + xblock/2; cy = y + yblock/2;
   rec_weighMV(fmv->child3, weight, cx, cy, xblock/2, yblock/2, hor, ver);
 }

}

/*
 *   weighMV()
 * Weigh motion vectors by a constant
 */
void weighMV(vector_ptr fmv, float weight, videoinfo info)
{
  int        hor, ver, xnum, ynum, xblk, yblk;
  int        x, y, X, Y, pos;

  hor  = info.ywidth; ver = info.yheight;
  xblk = info.xblk;   yblk = info.yblk;
  xnum = info.xnum;   ynum = info.ynum;

  /* fixed size block matching */
  for(y=0, Y=0 ; Y<ynum ; y+=yblk, Y++){
    for(x=0, X=0 ; X<xnum ; x+=xblk, X++){ 
      pos = Y*xnum+X;

      rec_weighMV(&fmv[pos], weight, x, y, xblk, yblk, hor, ver);
      }
    }
}
