|  |  | @@ -25,37 +25,76 @@ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #include "libavutil/x86/asm.h" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /* A single TEMPLATE_PP_* should be defined (to 1) when this template is | 
		
	
		
			
			|  |  |  | * included. The following macros will define its dependencies to 1 as well | 
		
	
		
			
			|  |  |  | * (like MMX2 depending on MMX), and will define to 0 all the others. Every | 
		
	
		
			
			|  |  |  | * TEMPLATE_PP_* need to be undef at the end. */ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_C | 
		
	
		
			
			|  |  |  | #   define RENAME(a) a ## _C | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_C 0 | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | #   define RENAME(a) a ## _altivec | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_ALTIVEC 0 | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #   define RENAME(a) a ## _MMX | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_MMX 0 | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #   undef  TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_MMX 1 | 
		
	
		
			
			|  |  |  | #   define RENAME(a) a ## _MMX2 | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_MMXEXT 0 | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | #   undef  TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_MMX 1 | 
		
	
		
			
			|  |  |  | #   define RENAME(a) a ## _3DNow | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | #   define TEMPLATE_PP_3DNOW 0 | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #undef REAL_PAVGB | 
		
	
		
			
			|  |  |  | #undef PAVGB | 
		
	
		
			
			|  |  |  | #undef PMINUB | 
		
	
		
			
			|  |  |  | #undef PMAXUB | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if   HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if   TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" | 
		
	
		
			
			|  |  |  | #elif HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | #define PAVGB(a,b)  REAL_PAVGB(a,b) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if   HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if   TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" | 
		
	
		
			
			|  |  |  | #elif HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #define PMINUB(b,a,t) \ | 
		
	
		
			
			|  |  |  | "movq " #a ", " #t " \n\t"\ | 
		
	
		
			
			|  |  |  | "psubusb " #b ", " #t " \n\t"\ | 
		
	
		
			
			|  |  |  | "psubb " #t ", " #a " \n\t" | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if   HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if   TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" | 
		
	
		
			
			|  |  |  | #elif HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #define PMAXUB(a,b) \ | 
		
	
		
			
			|  |  |  | "psubusb " #a ", " #b " \n\t"\ | 
		
	
		
			
			|  |  |  | "paddb " #a ", " #b " \n\t" | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | //FIXME? |255-0| = 1 (should not be a problem ...) | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * Check if the middle 8x8 Block in the given 8x16 block is flat | 
		
	
		
			
			|  |  |  | */ | 
		
	
	
		
			
				|  |  | @@ -135,7 +174,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ | 
		
	
		
			
			|  |  |  | "psubusb %%mm3, %%mm4                   \n\t" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | "                                       \n\t" | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pxor %%mm7, %%mm7                      \n\t" | 
		
	
		
			
			|  |  |  | "psadbw %%mm7, %%mm0                    \n\t" | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -169,16 +208,16 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ | 
		
	
		
			
			|  |  |  | return 2; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 
		
	
		
			
			|  |  |  | * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | #if !HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #if !TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= stride*3; | 
		
	
		
			
			|  |  |  | __asm__ volatile(        //"movv %0 %1 %2\n\t" | 
		
	
		
			
			|  |  |  | "movq %2, %%mm0                         \n\t"  // QP,..., QP | 
		
	
	
		
			
				|  |  | @@ -305,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_c | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | const int l1= stride; | 
		
	
		
			
			|  |  |  | const int l2= stride + l1; | 
		
	
		
			
			|  |  |  | const int l3= stride + l2; | 
		
	
	
		
			
				|  |  | @@ -344,9 +383,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * Experimental Filter 1 | 
		
	
	
		
			
				|  |  | @@ -357,7 +396,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= stride*3; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
	
		
			
				|  |  | @@ -443,7 +482,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | 
		
	
		
			
			|  |  |  | : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_c | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | const int l1= stride; | 
		
	
		
			
			|  |  |  | const int l2= stride + l1; | 
		
	
	
		
			
				|  |  | @@ -477,13 +516,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if !HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #if !TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | /* | 
		
	
		
			
			|  |  |  | uint8_t tmp[16]; | 
		
	
		
			
			|  |  |  | const int l1= stride; | 
		
	
	
		
			
				|  |  | @@ -764,7 +803,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | #elif HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars | 
		
	
		
			
			|  |  |  | src+= stride*4; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
	
		
			
				|  |  | @@ -872,7 +911,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | "movq (%3), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 
		
	
		
			
			|  |  |  | "movq 8(%3), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "movq %%mm7, %%mm6                      \n\t" // 0 | 
		
	
		
			
			|  |  |  | "psubw %%mm0, %%mm6                     \n\t" | 
		
	
		
			
			|  |  |  | "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 
		
	
	
		
			
				|  |  | @@ -904,7 +943,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pminsw %%mm2, %%mm0                    \n\t" | 
		
	
		
			
			|  |  |  | "pminsw %%mm3, %%mm1                    \n\t" | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -968,7 +1007,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | "pand %%mm2, %%mm4                      \n\t" | 
		
	
		
			
			|  |  |  | "pand %%mm3, %%mm5                      \n\t" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pminsw %%mm0, %%mm4                    \n\t" | 
		
	
		
			
			|  |  |  | "pminsw %%mm1, %%mm5                    \n\t" | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -995,7 +1034,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) | 
		
	
		
			
			|  |  |  | : "%"REG_a | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | const int l1= stride; | 
		
	
		
			
			|  |  |  | const int l2= stride + l1; | 
		
	
		
			
			|  |  |  | const int l3= stride + l2; | 
		
	
	
		
			
				|  |  | @@ -1033,14 +1072,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if !HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #if !TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | DECLARE_ALIGNED(8, uint64_t, tmp)[3]; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "pxor %%mm6, %%mm6                      \n\t" | 
		
	
	
		
			
				|  |  | @@ -1060,7 +1099,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #undef REAL_FIND_MIN_MAX | 
		
	
		
			
			|  |  |  | #undef FIND_MIN_MAX | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define REAL_FIND_MIN_MAX(addr)\ | 
		
	
		
			
			|  |  |  | "movq " #addr ", %%mm0                  \n\t"\ | 
		
	
		
			
			|  |  |  | "pminub %%mm0, %%mm7                    \n\t"\ | 
		
	
	
		
			
				|  |  | @@ -1087,7 +1126,7 @@ FIND_MIN_MAX((%0, %1, 8)) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | "movq %%mm7, %%mm4                      \n\t" | 
		
	
		
			
			|  |  |  | "psrlq $8, %%mm7                        \n\t" | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pminub %%mm4, %%mm7                    \n\t" // min of pixels | 
		
	
		
			
			|  |  |  | "pshufw $0xF9, %%mm7, %%mm4             \n\t" | 
		
	
		
			
			|  |  |  | "pminub %%mm4, %%mm7                    \n\t" // min of pixels | 
		
	
	
		
			
				|  |  | @@ -1112,7 +1151,7 @@ FIND_MIN_MAX((%0, %1, 8)) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | "movq %%mm6, %%mm4                      \n\t" | 
		
	
		
			
			|  |  |  | "psrlq $8, %%mm6                        \n\t" | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pmaxub %%mm4, %%mm6                    \n\t" // max of pixels | 
		
	
		
			
			|  |  |  | "pshufw $0xF9, %%mm6, %%mm4             \n\t" | 
		
	
		
			
			|  |  |  | "pmaxub %%mm4, %%mm6                    \n\t" | 
		
	
	
		
			
				|  |  | @@ -1266,7 +1305,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | int y; | 
		
	
		
			
			|  |  |  | int min=255; | 
		
	
		
			
			|  |  |  | int max=0; | 
		
	
	
		
			
				|  |  | @@ -1383,9 +1422,9 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, | 
		
	
		
			
			|  |  |  | //        src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * Deinterlace the given block by linearly interpolating every second line. | 
		
	
	
		
			
				|  |  | @@ -1395,7 +1434,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= 4*stride; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
	
		
			
				|  |  | @@ -1448,7 +1487,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= stride*3; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
	
		
			
				|  |  | @@ -1490,7 +1529,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d, "%"REG_c | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | int x; | 
		
	
		
			
			|  |  |  | src+= stride*3; | 
		
	
		
			
			|  |  |  | for(x=0; x<8; x++){ | 
		
	
	
		
			
				|  |  | @@ -1500,7 +1539,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, | 
		
	
		
			
			|  |  |  | src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
	
		
			
				|  |  | @@ -1512,7 +1551,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= stride*4; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
	
		
			
				|  |  | @@ -1561,7 +1600,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | int x; | 
		
	
		
			
			|  |  |  | src+= stride*4; | 
		
	
		
			
			|  |  |  | for(x=0; x<8; x++){ | 
		
	
	
		
			
				|  |  | @@ -1579,7 +1618,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
	
		
			
				|  |  | @@ -1591,7 +1630,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= stride*4; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
	
		
			
				|  |  | @@ -1651,7 +1690,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | int x; | 
		
	
		
			
			|  |  |  | src+= stride*4; | 
		
	
		
			
			|  |  |  | for(x=0; x<8; x++){ | 
		
	
	
		
			
				|  |  | @@ -1680,7 +1719,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
	
		
			
				|  |  | @@ -1692,7 +1731,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4)) | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | src+= 4*stride; | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
	
		
			
				|  |  | @@ -1739,7 +1778,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | int a, b, c, x; | 
		
	
		
			
			|  |  |  | src+= 4*stride; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
				|  |  | @@ -1782,7 +1821,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin | 
		
	
		
			
			|  |  |  | src += 4; | 
		
	
		
			
			|  |  |  | tmp += 4; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | /** | 
		
	
	
		
			
				|  |  | @@ -1793,9 +1832,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | src+= 4*stride; | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0, %1), %%"REG_a"                \n\t" | 
		
	
		
			
			|  |  |  | "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t" | 
		
	
	
		
			
				|  |  | @@ -1885,8 +1924,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) | 
		
	
		
			
			|  |  |  | : : "r" (src), "r" ((x86_reg)stride) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #else //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | int x, y; | 
		
	
		
			
			|  |  |  | src+= 4*stride; | 
		
	
		
			
			|  |  |  | // FIXME - there should be a way to do a few columns in parallel like w/mmx | 
		
	
	
		
			
				|  |  | @@ -1905,10 +1944,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | src++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * Transpose and shift the given 8x8 Block into dst1 and dst2. | 
		
	
		
			
			|  |  |  | */ | 
		
	
	
		
			
				|  |  | @@ -2073,10 +2112,10 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | //static long test=0; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if !HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #if !TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | 
		
	
		
			
			|  |  |  | uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) | 
		
	
		
			
			|  |  |  | { | 
		
	
	
		
			
				|  |  | @@ -2087,7 +2126,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #define FAST_L2_DIFF | 
		
	
		
			
			|  |  |  | //#define L1_DIFF //u should change the thresholds too if u try that one | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride | 
		
	
		
			
			|  |  |  | "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride | 
		
	
	
		
			
				|  |  | @@ -2375,7 +2414,7 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc)) | 
		
	
		
			
			|  |  |  | :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d, "%"REG_c, "memory" | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | int y; | 
		
	
		
			
			|  |  |  | int d=0; | 
		
	
	
		
			
				|  |  | @@ -2458,11 +2497,11 @@ Switch between | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | /** | 
		
	
		
			
			|  |  |  | * accurate deblock filter | 
		
	
		
			
			|  |  |  | */ | 
		
	
	
		
			
				|  |  | @@ -2865,7 +2904,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | 
		
	
		
			
			|  |  |  | "movq (%4), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 
		
	
		
			
			|  |  |  | "movq 8(%4), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "movq %%mm7, %%mm6                      \n\t" // 0 | 
		
	
		
			
			|  |  |  | "psubw %%mm0, %%mm6                     \n\t" | 
		
	
		
			
			|  |  |  | "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 
		
	
	
		
			
				|  |  | @@ -2897,7 +2936,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | 
		
	
		
			
			|  |  |  | "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pminsw %%mm2, %%mm0                    \n\t" | 
		
	
		
			
			|  |  |  | "pminsw %%mm3, %%mm1                    \n\t" | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -2961,7 +3000,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | 
		
	
		
			
			|  |  |  | "pand %%mm2, %%mm4                      \n\t" | 
		
	
		
			
			|  |  |  | "pand %%mm3, %%mm5                      \n\t" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | "pminsw %%mm0, %%mm4                    \n\t" | 
		
	
		
			
			|  |  |  | "pminsw %%mm1, %%mm5                    \n\t" | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -2998,7 +3037,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } */ | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | 
		
	
		
			
			|  |  |  | const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); | 
		
	
	
		
			
				|  |  | @@ -3013,18 +3052,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, | 
		
	
		
			
			|  |  |  | int levelFix, int64_t *packedOffsetAndScale) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if !HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if !TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | int i; | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | if(levelFix){ | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset | 
		
	
		
			
			|  |  |  | "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale | 
		
	
		
			
			|  |  |  | "lea (%2,%4), %%"REG_a"         \n\t" | 
		
	
		
			
			|  |  |  | "lea (%3,%5), %%"REG_d"         \n\t" | 
		
	
		
			
			|  |  |  | "pxor %%mm4, %%mm4              \n\t" | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \ | 
		
	
		
			
			|  |  |  | "movq " #src1 ", %%mm0          \n\t"\ | 
		
	
		
			
			|  |  |  | "movq " #src1 ", %%mm5          \n\t"\ | 
		
	
	
		
			
				|  |  | @@ -3047,7 +3086,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t | 
		
	
		
			
			|  |  |  | "movq %%mm0, " #dst1 "          \n\t"\ | 
		
	
		
			
			|  |  |  | "movq %%mm1, " #dst2 "          \n\t"\ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #else //HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \ | 
		
	
		
			
			|  |  |  | "movq " #src1 ", %%mm0          \n\t"\ | 
		
	
		
			
			|  |  |  | "movq " #src1 ", %%mm5          \n\t"\ | 
		
	
	
		
			
				|  |  | @@ -3074,7 +3113,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t | 
		
	
		
			
			|  |  |  | "movq %%mm0, " #dst1 "          \n\t"\ | 
		
	
		
			
			|  |  |  | "movq %%mm1, " #dst2 "          \n\t"\ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #define SCALED_CPY(src1, src2, dst1, dst2)\ | 
		
	
		
			
			|  |  |  | REAL_SCALED_CPY(src1, src2, dst1, dst2) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
				|  |  | @@ -3094,13 +3133,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) | 
		
	
		
			
			|  |  |  | "r" ((x86_reg)dstStride) | 
		
	
		
			
			|  |  |  | : "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | for(i=0; i<8; i++) | 
		
	
		
			
			|  |  |  | memcpy( &(dst[dstStride*i]), | 
		
	
		
			
			|  |  |  | &(src[srcStride*i]), BLOCK_SIZE); | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | }else{ | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "lea (%0,%2), %%"REG_a"                 \n\t" | 
		
	
		
			
			|  |  |  | "lea (%1,%3), %%"REG_d"                 \n\t" | 
		
	
	
		
			
				|  |  | @@ -3127,11 +3166,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) | 
		
	
		
			
			|  |  |  | "r" ((x86_reg)dstStride) | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | #else //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #else //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | for(i=0; i<8; i++) | 
		
	
		
			
			|  |  |  | memcpy( &(dst[dstStride*i]), | 
		
	
		
			
			|  |  |  | &(src[srcStride*i]), BLOCK_SIZE); | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
				|  |  | @@ -3140,7 +3179,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) | 
		
	
		
			
			|  |  |  | */ | 
		
	
		
			
			|  |  |  | static inline void RENAME(duplicate)(uint8_t src[], int stride) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "movq (%0), %%mm0               \n\t" | 
		
	
		
			
			|  |  |  | "add %1, %0                     \n\t" | 
		
	
	
		
			
				|  |  | @@ -3168,8 +3207,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access | 
		
	
		
			
			|  |  |  | int x,y; | 
		
	
		
			
			|  |  |  | #ifdef COMPILE_TIME_MODE | 
		
	
		
			
			|  |  |  | const int mode= COMPILE_TIME_MODE; | 
		
	
		
			
			|  |  |  | #ifdef TEMPLATE_PP_TIME_MODE | 
		
	
		
			
			|  |  |  | const int mode= TEMPLATE_PP_TIME_MODE; | 
		
	
		
			
			|  |  |  | #else | 
		
	
		
			
			|  |  |  | const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; | 
		
	
		
			
			|  |  |  | #endif | 
		
	
	
		
			
				|  |  | @@ -3177,7 +3216,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | int QPCorrecture= 256*256; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | int copyAhead; | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | int i; | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
				|  |  | @@ -3190,7 +3229,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; | 
		
	
		
			
			|  |  |  | //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | for(i=0; i<57; i++){ | 
		
	
		
			
			|  |  |  | int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; | 
		
	
		
			
			|  |  |  | int threshold= offset*2 + 1; | 
		
	
	
		
			
				|  |  | @@ -3248,7 +3287,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | c.packedYScale= (uint16_t)(scale*256.0 + 0.5); | 
		
	
		
			
			|  |  |  | c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; | 
		
	
		
			
			|  |  |  | #else | 
		
	
	
		
			
				|  |  | @@ -3281,7 +3320,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 
		
	
		
			
			|  |  |  | for(x=0; x<width; x+=BLOCK_SIZE){ | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | /* | 
		
	
		
			
			|  |  |  | prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); | 
		
	
		
			
			|  |  |  | prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 
		
	
	
		
			
				|  |  | @@ -3308,7 +3347,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #elif HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 
		
	
		
			
			|  |  |  | /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); | 
		
	
		
			
			|  |  |  | prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); | 
		
	
	
		
			
				|  |  | @@ -3354,7 +3393,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | //1% speedup if these are here instead of the inner loop | 
		
	
		
			
			|  |  |  | const uint8_t *srcBlock= &(src[y*srcStride]); | 
		
	
		
			
			|  |  |  | uint8_t *dstBlock= &(dst[y*dstStride]); | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | uint8_t *tempBlock1= c.tempBlocks; | 
		
	
		
			
			|  |  |  | uint8_t *tempBlock2= c.tempBlocks + 8; | 
		
	
		
			
			|  |  |  | #endif | 
		
	
	
		
			
				|  |  | @@ -3390,7 +3429,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 
		
	
		
			
			|  |  |  | for(x=0; x<width; x+=BLOCK_SIZE){ | 
		
	
		
			
			|  |  |  | const int stride= dstStride; | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | uint8_t *tmpXchg; | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | if(isColor){ | 
		
	
	
		
			
				|  |  | @@ -3404,7 +3443,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | yHistogram[ srcBlock[srcStride*12 + 4] ]++; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | c.QP= QP; | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | __asm__ volatile( | 
		
	
		
			
			|  |  |  | "movd %1, %%mm7         \n\t" | 
		
	
		
			
			|  |  |  | "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | 
		
	
	
		
			
				|  |  | @@ -3417,7 +3456,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMXEXT_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | /* | 
		
	
		
			
			|  |  |  | prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); | 
		
	
		
			
			|  |  |  | prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 
		
	
	
		
			
				|  |  | @@ -3444,7 +3483,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | : "%"REG_a, "%"REG_d | 
		
	
		
			
			|  |  |  | ); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #elif HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 
		
	
		
			
			|  |  |  | /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); | 
		
	
		
			
			|  |  |  | prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); | 
		
	
	
		
			
				|  |  | @@ -3488,12 +3527,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | /* check if we have a previous block to deblock it with dstBlock */ | 
		
	
		
			
			|  |  |  | if(x - 8 >= 0){ | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | if(mode & H_X1_FILTER) | 
		
	
		
			
			|  |  |  | RENAME(vertX1Filter)(tempBlock1, 16, &c); | 
		
	
		
			
			|  |  |  | else if(mode & H_DEBLOCK){ | 
		
	
	
		
			
				|  |  | @@ -3514,7 +3553,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | if(mode & H_X1_FILTER) | 
		
	
		
			
			|  |  |  | horizX1Filter(dstBlock-4, stride, QP); | 
		
	
		
			
			|  |  |  | else if(mode & H_DEBLOCK){ | 
		
	
		
			
			|  |  |  | #if HAVE_ALTIVEC | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; | 
		
	
		
			
			|  |  |  | int t; | 
		
	
		
			
			|  |  |  | transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); | 
		
	
	
		
			
				|  |  | @@ -3539,7 +3578,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | }else if(mode & H_A_DEBLOCK){ | 
		
	
		
			
			|  |  |  | RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #endif //HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #endif //TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | if(mode & DERING){ | 
		
	
		
			
			|  |  |  | //FIXME filter first line | 
		
	
		
			
			|  |  |  | if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); | 
		
	
	
		
			
				|  |  | @@ -3557,7 +3596,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | dstBlock+=8; | 
		
	
		
			
			|  |  |  | srcBlock+=8; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #if HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #if TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | tmpXchg= tempBlock1; | 
		
	
		
			
			|  |  |  | tempBlock1= tempBlock2; | 
		
	
		
			
			|  |  |  | tempBlock2 = tmpXchg; | 
		
	
	
		
			
				|  |  | @@ -3597,9 +3636,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; | 
		
	
		
			
			|  |  |  | }*/ | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | #if   HAVE_AMD3DNOW_INLINE | 
		
	
		
			
			|  |  |  | #if   TEMPLATE_PP_3DNOW | 
		
	
		
			
			|  |  |  | __asm__ volatile("femms"); | 
		
	
		
			
			|  |  |  | #elif HAVE_MMX_INLINE | 
		
	
		
			
			|  |  |  | #elif TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | __asm__ volatile("emms"); | 
		
	
		
			
			|  |  |  | #endif | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
				|  |  | @@ -3629,3 +3668,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | 
		
	
		
			
			|  |  |  | *c2= c; //copy local context back | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | #undef RENAME | 
		
	
		
			
			|  |  |  | #undef TEMPLATE_PP_C | 
		
	
		
			
			|  |  |  | #undef TEMPLATE_PP_ALTIVEC | 
		
	
		
			
			|  |  |  | #undef TEMPLATE_PP_MMX | 
		
	
		
			
			|  |  |  | #undef TEMPLATE_PP_MMXEXT | 
		
	
		
			
			|  |  |  | #undef TEMPLATE_PP_3DNOW |