/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** MMX accelerated quantization *************************/

static void inline quantize(dct_t *block,
			    dct_t *cache,
			    dct_t *qmatrix,
			    dct_t *round)
{
/*block[i] = (short) ((cache[i]+sign0round(cache[i], round[i]))*qmatrix[i]); */

#define QUANTIZE_STEP(x,y) \
                "movq 0x" #x "0(%0), %%mm0\n"	/* mm0 = 1st half data */ \
		"pxor %%mm2, %%mm2\n"		/* mm2 = 0             */ \
		"pxor %%mm6, %%mm6\n"		/* mm6 = 0             */ \
                "movq 0x" #y "0(%0), %%mm4\n"	/* mm4 = 3rd half data */ \
		"pcmpgtw %%mm0, %%mm2\n"	/* mm2 = (mm0<0)?0xffff:0 */ \
		"pxor %%mm3, %%mm3\n"		/* mm3 = 0             */ \
		"movq 0x" #x "8(%0), %%mm1\n"	/* mm1 = 2nd half data */ \
		"pcmpgtw %%mm4, %%mm6\n"	/* mm6 = (mm4<0)?0xffff:0 */ \
		"pxor %%mm7, %%mm7\n"		/* mm7 = 0             */ \
		"movq 0x" #y "8(%0), %%mm5\n"	/* mm5 = 4th half data */ \
		"pcmpgtw %%mm1, %%mm3\n"	/* mm3 = (mm1<0)?0xffff:0 */ \
		"pcmpgtw %%mm5, %%mm7\n"	/* mm7 = (mm5<0)?0xffff:0 */ \
                "pxor %%mm2, %%mm0\n"	        /* mm0 = |mm0|-(mm0<0)    */ \
		"pxor %%mm3, %%mm1\n"	        /* mm1 = |mm1|-(mm1<0)    */ \
                "paddw 0x" #x "0(%3), %%mm0\n"	/* mm2 = add rounding     */ \
                "pxor %%mm6, %%mm4\n"	        /* mm4 = |mm4|-(mm4<0)    */ \
		"paddw 0x" #x "8(%3), %%mm1\n"	/* mm3 = add rounding     */ \
		"pxor %%mm7, %%mm5\n"	        /* mm5 = |mm5|-(mm5<0)    */ \
                "paddw 0x" #y "0(%3), %%mm4\n"	/* mm6 = add rounding     */ \
                "psubw %%mm2, %%mm0\n"	        /* mm0 = |1st half data|  */ \
		"paddw 0x" #y "8(%3), %%mm5\n"	/* mm7 = add rounding     */ \
		"psubw %%mm3, %%mm1\n"	        /* mm1 = |2nd half data|  */ \
		"pmulhw 0x" #x "0(%1), %%mm0\n"	/* mm0 *= qmatrix */	\
                "psubw %%mm6, %%mm4\n"	        /* mm4 = |3rd half data|  */ \
		"pmulhw 0x" #x "8(%1), %%mm1\n"	/* mm1 *= qmatrix */	\
		"psubw %%mm7, %%mm5\n"	        /* mm5 = |4th half data|  */ \
		"pmulhw 0x" #y "0(%1), %%mm4\n"	/* mm4 *= qmatrix */	\
                "pxor %%mm2, %%mm0\n"	        /* mm0 = |quant|*sign    */ \
		"pmulhw 0x" #y "8(%1), %%mm5\n"	/* mm5 *= qmatrix */	\
                "psubw %%mm2, %%mm0\n"	        /* mm0 = add sign  */ \
		"pxor %%mm3, %%mm1\n"	        /* mm1 = |quant|*sign    */ \
		"movq %%mm0, 0x" #x "0(%2)\n"	/* 1st half data = mm0 */ \
		"psubw %%mm3, %%mm1\n"	        /* mm1 = add sign  */ \
                "pxor %%mm6, %%mm4\n"	        /* mm4 = |quant|*sign    */ \
		"movq %%mm1, 0x" #x "8(%2)\n"   /* 2nd half data = mm1 */ \
                "psubw %%mm6, %%mm4\n"	        /* mm4 = add sign  */ \
		"pxor %%mm7, %%mm5\n"	        /* mm5 = |quant|*sign    */ \
		"movq %%mm4, 0x" #y "0(%2)\n"	/* 3rd half data = mm4 */ \
		"psubw %%mm7, %%mm5\n"	        /* mm5 = add sign  */ \
		"movq %%mm5, 0x" #y "8(%2)\n"   /* 4th half data = mm5 */


  asm volatile (
		QUANTIZE_STEP(0,1)
		QUANTIZE_STEP(2,3)
		QUANTIZE_STEP(4,5)
		QUANTIZE_STEP(6,7)
		: "=r"(cache), "=r"(qmatrix), "=r"(block), "=r"(round)
		: "0"(cache), "1"(qmatrix), "2"(block), "3"(round)
		: "memory");
}
