| 
				
				
					
				
				
				 | 
			
			 | 
			@@ -287,7 +287,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    uint16_t *d = (uint16_t *)dst; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    end = s + src_size; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    mm_end = end - 15; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq           %3, %%mm5   \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq           %4, %%mm6   \n\t" | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -322,47 +321,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "+r" (d), "+r"(s) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    ); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#else | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq    %0, %%mm7    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq    %1, %%mm6    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        ::"m"(red_16mask),"m"(green_16mask)); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    while (s < mm_end) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            PREFETCH"    32%1           \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movd          %1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movd         4%1, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "punpckldq    8%1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "punpckldq   12%1, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm0, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm0, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm3, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm3, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $3, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $3, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand          %2, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand          %2, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $5, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $5, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm6, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm6, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $8, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $8, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm7, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm7, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm4, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm2, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm5, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psllq        $16, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm3, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            MOVNTQ"     %%mm0, %0       \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        d += 4; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        s += 16; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#endif | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(SFENCE:::"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(EMMS:::"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    while (s < end) { | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -434,7 +392,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    uint16_t *d = (uint16_t *)dst; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    end = s + src_size; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    mm_end = end - 15; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq           %3, %%mm5   \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq           %4, %%mm6   \n\t" | 
		
		
	
	
		
			
				| 
				
					
				
				
					
				
				
				 | 
			
			 | 
			@@ -469,47 +426,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "+r" (d), "+r"(s) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    ); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#else | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq          %0, %%mm7    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        "movq          %1, %%mm6    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        ::"m"(red_15mask),"m"(green_15mask)); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    while (s < mm_end) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        __asm__ volatile( | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            PREFETCH"    32%1           \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movd          %1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movd         4%1, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "punpckldq    8%1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "punpckldq   12%1, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm0, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm0, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm3, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "movq       %%mm3, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $3, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $3, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand          %2, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand          %2, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $6, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $6, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm6, %%mm1    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm6, %%mm4    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $9, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psrlq         $9, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm7, %%mm2    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "pand       %%mm7, %%mm5    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm1, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm4, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm2, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm5, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "psllq        $16, %%mm3    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            "por        %%mm3, %%mm0    \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            MOVNTQ"     %%mm0, %0       \n\t" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        d += 4; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			        s += 16; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#endif | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(SFENCE:::"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    __asm__ volatile(EMMS:::"memory"); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    while (s < end) { | 
		
		
	
	
		
			
				| 
				
					
				
				
				
				 | 
			
			 | 
			
  |