| 
				
				
					
				
				
				 | 
			
			 | 
			@@ -38,6 +38,10 @@ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                                   const uint8_t *src, x86_reg stride, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                                   int rnd, int64_t shift); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			void ff_vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                                   const int16_t *src, int rnd); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			void ff_vc1_avg_hor_16b_shift2_mmxext(uint8_t *dst, x86_reg stride, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                                      const int16_t *src, int rnd); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define OP_PUT(S,D) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -70,55 +74,6 @@ void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			     "punpcklwd %%mm7, %%mm7           \n\t"    \ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			     "punpckldq %%mm7, %%mm7           \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Data is already unpacked, so some operations can directly be made from | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * memory. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define VC1_HOR_16b_SHIFT2(OP, OPNAME)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                                             const int16_t *src, int rnd)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    int h = 8;\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    src -= 1;\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        LOAD_ROUNDER_MMX("%4")\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      "MANGLE(ff_pw_128)", %%mm6\n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      "MANGLE(ff_pw_9)", %%mm5 \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "1:                                \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      2*0+0(%1), %%mm1        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      2*0+8(%1), %%mm2        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      2*1+0(%1), %%mm3        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq      2*1+8(%1), %%mm4        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     2*3+0(%1), %%mm1        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     2*3+8(%1), %%mm2        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     2*2+0(%1), %%mm3        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     2*2+8(%1), %%mm4        \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "pmullw    %%mm5, %%mm3            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "pmullw    %%mm5, %%mm4            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "psubw     %%mm1, %%mm3            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "psubw     %%mm2, %%mm4            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        NORMALIZE_MMX("$7")\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        /* Remove bias */\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     %%mm6, %%mm3            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "paddw     %%mm6, %%mm4            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        TRANSFER_DO_PACK(OP)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "add       $24, %1                 \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "add       %3, %2                  \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "decl      %0                      \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "jnz 1b                            \n\t"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "+r"(h), "+r" (src),  "+r" (dst)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "r"(stride), "m"(rnd)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "memory"\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    );\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_HOR_16b_SHIFT2(OP_PUT, put_) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_HOR_16b_SHIFT2(OP_AVG, avg_) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Purely vertical or horizontal 1/2 shift interpolation. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Sacrify mm6 for *9 factor. | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -380,14 +335,14 @@ typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * @param  hmode   Vertical filter. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * @param  rnd     Rounding bias. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define VC1_MSPEL_MC(OP)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define VC1_MSPEL_MC(OP, INSTR)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                               int hmode, int vmode, int rnd)\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         { NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			\ | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -428,8 +383,8 @@ static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_MSPEL_MC(put_) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_MSPEL_MC(avg_) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_MSPEL_MC(put_, mmx) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			VC1_MSPEL_MC(avg_, mmxext) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** Macro to ease bicubic filter interpolation functions declarations */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define DECLARE_FUNCTION(a, b)                                          \ | 
		
		
	
	
		
			
				| 
				
					
				
				
				
				 | 
			
			 | 
			
  |