/*****************************************************************************
 * $Id: vop-deint-lb.c,v 1.5 2004/09/18 16:46:55 alainjj Exp $
 * Program under GNU General Public License (see ../COPYING)
 * Deinterlace routines for xine by Miguel Freitas
 * based of DScaler project sources (deinterlace.sourceforge.net)
 *****************************************************************************/
#include <stdio.h>
#include "config.h"
#include "colorspace.h"
#include "vop.h"
#include "memcpy.h"
#include "cpu_accel.h"
extern int debug;

/* Deinterlace linear blend filter */

static int deinterlace_linearblend_yuv
(vop2 *v,  unsigned char *dest, unsigned char *src, 
 int width, int height) {
#ifdef ARCH_X86
  int Line;
  uint64_t *YVal1;
  uint64_t *YVal2;
  uint64_t *YVal3;
  uint64_t *Dest;
  int LineLength = width*2;
  
  int n;

  if(debug>1)
    fprintf(stderr, "deinterlace_linearblend_yuv_mmx %p %p %d %d\n",
	    dest, src, width, height);
  /* Copy first line */
  fast_memcpy(dest, src, LineLength);

  for (Line = 1; Line < height - 1; ++Line)
  {
    YVal1 = (uint64_t *)(src + (Line - 1) * LineLength);
    YVal2 = (uint64_t *)(src + (Line) * LineLength);
    YVal3 = (uint64_t *)(src + (Line + 1) * LineLength);
    Dest = (uint64_t *)(dest + Line * LineLength);

    n = LineLength >> 3;
    while( n-- )
    {
      /* load data from 3 lines */
      movq_m2r (*YVal1++, mm0);
      movq_m2r (*YVal2++, mm1);
      movq_m2r (*YVal3++, mm2);
      
      /* expand bytes to words */
      punpckhbw_r2r (mm0, mm3);
      punpckhbw_r2r (mm1, mm4);
      punpckhbw_r2r (mm2, mm5);
      punpcklbw_r2r (mm0, mm0);
      punpcklbw_r2r (mm1, mm1);
      punpcklbw_r2r (mm2, mm2);
      
      /* 
       * deinterlacing:
       * deint_line = (line0 + 2*line1 + line2) / 4
       */
      psrlw_i2r (07, mm0);
      psrlw_i2r (06, mm1);
      psrlw_i2r (07, mm2);
      psrlw_i2r (07, mm3);
      psrlw_i2r (06, mm4);
      psrlw_i2r (07, mm5);
      paddw_r2r (mm1, mm0);
      paddw_r2r (mm2, mm0);
      paddw_r2r (mm4, mm3);
      paddw_r2r (mm5, mm3);
      psrlw_i2r (03, mm0);
      psrlw_i2r (03, mm3);

      /* pack 8 words to 8 bytes in mm0 */
      packuswb_r2r (mm3, mm0);
      
      movq_r2m ( mm0, *Dest++ );
    }
  }

  /* Copy last line */
  fast_memcpy(dest + Line * LineLength, 
	      src + Line * LineLength, LineLength);
                   
  /* clear out the MMX registers ready for doing floating point
   * again
   */
  emms();
#else
  register int x, y;
  register unsigned char *l0, *l1, *l2, *l3;

  width *=2;
  l0 = dest;		// target line
  l1 = src;		// 1st source line
  l2 = l1 + width;	// 2nd source line = line that follows l1
  l3 = l2 + width;	// 3rd source line = line that follows l2

  // Copy the first line
  fast_memcpy(l0, l1, width);
  l0 += width;

  for (y = 1; y < height-1; ++y) {
    // computes avg of: l1 + 2*l2 + l3

    for (x = 0; x < width; ++x) {
      l0[x] = (l1[x] + (l2[x]<<1) + l3[x]) >> 2;
    }

    // updates the line pointers
    l1 = l2; l2 = l3; l3 += width;
    l0 += width;
  }

  // Copy the last line
  fast_memcpy(l0, l1, width);
#endif
  return 1;
}

vop vop_deint_lb = {
  "deintlb",  /* name */
  1,           /* only 1 image is needed */
  VIDEO_YUYV,  /* input format  */
  VIDEO_YUYV,  /* output format */
  deinterlace_linearblend_yuv,  /* The TREATMENT function */
  NULL,        /* No reinitialization function */
  0,           /* the width of the input is equal to the output width*/
  0,           /* idem for the height */
  -1,          /* the destination height is 480 for ntsc, 576 for ntsc */
  1            /* preferably the last treatment */
};
