|  | @@ -54,75 +54,7 @@ SECTION .text | 
														
													
														
															
																|  |  | ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple |  |  | ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple | 
														
													
														
															
																|  |  | ; of 2. $offset is either 0 or 3. $dither holds 8 values. |  |  | ; of 2. $offset is either 0 or 3. $dither holds 8 values. | 
														
													
														
															
																|  |  | ;----------------------------------------------------------------------------- |  |  | ;----------------------------------------------------------------------------- | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | %macro yuv2planeX_fn 3 |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | %if ARCH_X86_32 |  |  |  | 
														
													
														
															
																|  |  | %define cntr_reg fltsizeq |  |  |  | 
														
													
														
															
																|  |  | %define movsx mov |  |  |  | 
														
													
														
															
																|  |  | %else |  |  |  | 
														
													
														
															
																|  |  | %define cntr_reg r7 |  |  |  | 
														
													
														
															
																|  |  | %define movsx movsxd |  |  |  | 
														
													
														
															
																|  |  | %endif |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset |  |  |  | 
														
													
														
															
																|  |  | %if %1 == 8 || %1 == 9 || %1 == 10 |  |  |  | 
														
													
														
															
																|  |  | pxor            m6,  m6 |  |  |  | 
														
													
														
															
																|  |  | %endif ; %1 == 8/9/10 |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | %if %1 == 8 |  |  |  | 
														
													
														
															
																|  |  | %if ARCH_X86_32 |  |  |  | 
														
													
														
															
																|  |  | %assign pad 0x2c - (stack_offset & 15) |  |  |  | 
														
													
														
															
																|  |  | SUB             rsp, pad |  |  |  | 
														
													
														
															
																|  |  | %define m_dith m7 |  |  |  | 
														
													
														
															
																|  |  | %else ; x86-64 |  |  |  | 
														
													
														
															
																|  |  | %define m_dith m9 |  |  |  | 
														
													
														
															
																|  |  | %endif ; x86-32 |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | ; create registers holding dither |  |  |  | 
														
													
														
															
																|  |  | movq        m_dith, [ditherq]        ; dither |  |  |  | 
														
													
														
															
																|  |  | test        offsetd, offsetd |  |  |  | 
														
													
														
															
																|  |  | jz              .no_rot |  |  |  | 
														
													
														
															
																|  |  | %if mmsize == 16 |  |  |  | 
														
													
														
															
																|  |  | punpcklqdq  m_dith,  m_dith |  |  |  | 
														
													
														
															
																|  |  | %endif ; mmsize == 16 |  |  |  | 
														
													
														
															
																|  |  | PALIGNR     m_dith,  m_dith,  3,  m0 |  |  |  | 
														
													
														
															
																|  |  | .no_rot: |  |  |  | 
														
													
														
															
																|  |  | %if mmsize == 16 |  |  |  | 
														
													
														
															
																|  |  | punpcklbw   m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | %if ARCH_X86_64 |  |  |  | 
														
													
														
															
																|  |  | punpcklwd       m8,  m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | pslld           m8,  12 |  |  |  | 
														
													
														
															
																|  |  | %else ; x86-32 |  |  |  | 
														
													
														
															
																|  |  | punpcklwd       m5,  m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | pslld           m5,  12 |  |  |  | 
														
													
														
															
																|  |  | %endif ; x86-32/64 |  |  |  | 
														
													
														
															
																|  |  | punpckhwd   m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | pslld       m_dith,  12 |  |  |  | 
														
													
														
															
																|  |  | %if ARCH_X86_32 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+ 0],  m5 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+16],  m_dith |  |  |  | 
														
													
														
															
																|  |  | %endif |  |  |  | 
														
													
														
															
																|  |  | %else ; mmsize == 8 |  |  |  | 
														
													
														
															
																|  |  | punpcklbw       m5,  m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | punpckhbw   m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | punpcklwd       m4,  m5,  m6 |  |  |  | 
														
													
														
															
																|  |  | punpckhwd       m5,  m6 |  |  |  | 
														
													
														
															
																|  |  | punpcklwd       m3,  m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | punpckhwd   m_dith,  m6 |  |  |  | 
														
													
														
															
																|  |  | pslld           m4,  12 |  |  |  | 
														
													
														
															
																|  |  | pslld           m5,  12 |  |  |  | 
														
													
														
															
																|  |  | pslld           m3,  12 |  |  |  | 
														
													
														
															
																|  |  | pslld       m_dith,  12 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+ 0],  m4 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+ 8],  m5 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+16],  m3 |  |  |  | 
														
													
														
															
																|  |  | mova      [rsp+24],  m_dith |  |  |  | 
														
													
														
															
																|  |  | %endif ; mmsize == 8/16 |  |  |  | 
														
													
														
															
																|  |  | %endif ; %1 == 8 |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  | xor             r5,  r5 |  |  |  | 
														
													
														
															
																|  |  | 
 |  |  |  | 
														
													
														
															
																|  |  |  |  |  | %macro yuv2planeX_mainloop 1 | 
														
													
														
															
																|  |  | .pixelloop: |  |  | .pixelloop: | 
														
													
														
															
																|  |  | %assign %%i 0 |  |  | %assign %%i 0 | 
														
													
														
															
																|  |  | ; the rep here is for the 8bit output mmx case, where dither covers |  |  | ; the rep here is for the 8bit output mmx case, where dither covers | 
														
													
												
													
														
															
																|  | @@ -233,6 +165,77 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset | 
														
													
														
															
																|  |  | %assign %%i %%i+2 |  |  | %assign %%i %%i+2 | 
														
													
														
															
																|  |  | %endrep |  |  | %endrep | 
														
													
														
															
																|  |  | jg .pixelloop |  |  | jg .pixelloop | 
														
													
														
															
																|  |  |  |  |  | %endmacro | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | %macro yuv2planeX_fn 3 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | %if ARCH_X86_32 | 
														
													
														
															
																|  |  |  |  |  | %define cntr_reg fltsizeq | 
														
													
														
															
																|  |  |  |  |  | %define movsx mov | 
														
													
														
															
																|  |  |  |  |  | %else | 
														
													
														
															
																|  |  |  |  |  | %define cntr_reg r7 | 
														
													
														
															
																|  |  |  |  |  | %define movsx movsxd | 
														
													
														
															
																|  |  |  |  |  | %endif | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset | 
														
													
														
															
																|  |  |  |  |  | %if %1 == 8 || %1 == 9 || %1 == 10 | 
														
													
														
															
																|  |  |  |  |  | pxor            m6,  m6 | 
														
													
														
															
																|  |  |  |  |  | %endif ; %1 == 8/9/10 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | %if %1 == 8 | 
														
													
														
															
																|  |  |  |  |  | %if ARCH_X86_32 | 
														
													
														
															
																|  |  |  |  |  | %assign pad 0x2c - (stack_offset & 15) | 
														
													
														
															
																|  |  |  |  |  | SUB             rsp, pad | 
														
													
														
															
																|  |  |  |  |  | %define m_dith m7 | 
														
													
														
															
																|  |  |  |  |  | %else ; x86-64 | 
														
													
														
															
																|  |  |  |  |  | %define m_dith m9 | 
														
													
														
															
																|  |  |  |  |  | %endif ; x86-32 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | ; create registers holding dither | 
														
													
														
															
																|  |  |  |  |  | movq        m_dith, [ditherq]        ; dither | 
														
													
														
															
																|  |  |  |  |  | test        offsetd, offsetd | 
														
													
														
															
																|  |  |  |  |  | jz              .no_rot | 
														
													
														
															
																|  |  |  |  |  | %if mmsize == 16 | 
														
													
														
															
																|  |  |  |  |  | punpcklqdq  m_dith,  m_dith | 
														
													
														
															
																|  |  |  |  |  | %endif ; mmsize == 16 | 
														
													
														
															
																|  |  |  |  |  | PALIGNR     m_dith,  m_dith,  3,  m0 | 
														
													
														
															
																|  |  |  |  |  | .no_rot: | 
														
													
														
															
																|  |  |  |  |  | %if mmsize == 16 | 
														
													
														
															
																|  |  |  |  |  | punpcklbw   m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | %if ARCH_X86_64 | 
														
													
														
															
																|  |  |  |  |  | punpcklwd       m8,  m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | pslld           m8,  12 | 
														
													
														
															
																|  |  |  |  |  | %else ; x86-32 | 
														
													
														
															
																|  |  |  |  |  | punpcklwd       m5,  m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | pslld           m5,  12 | 
														
													
														
															
																|  |  |  |  |  | %endif ; x86-32/64 | 
														
													
														
															
																|  |  |  |  |  | punpckhwd   m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | pslld       m_dith,  12 | 
														
													
														
															
																|  |  |  |  |  | %if ARCH_X86_32 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+ 0],  m5 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+16],  m_dith | 
														
													
														
															
																|  |  |  |  |  | %endif | 
														
													
														
															
																|  |  |  |  |  | %else ; mmsize == 8 | 
														
													
														
															
																|  |  |  |  |  | punpcklbw       m5,  m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | punpckhbw   m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | punpcklwd       m4,  m5,  m6 | 
														
													
														
															
																|  |  |  |  |  | punpckhwd       m5,  m6 | 
														
													
														
															
																|  |  |  |  |  | punpcklwd       m3,  m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | punpckhwd   m_dith,  m6 | 
														
													
														
															
																|  |  |  |  |  | pslld           m4,  12 | 
														
													
														
															
																|  |  |  |  |  | pslld           m5,  12 | 
														
													
														
															
																|  |  |  |  |  | pslld           m3,  12 | 
														
													
														
															
																|  |  |  |  |  | pslld       m_dith,  12 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+ 0],  m4 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+ 8],  m5 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+16],  m3 | 
														
													
														
															
																|  |  |  |  |  | mova      [rsp+24],  m_dith | 
														
													
														
															
																|  |  |  |  |  | %endif ; mmsize == 8/16 | 
														
													
														
															
																|  |  |  |  |  | %endif ; %1 == 8 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | xor             r5,  r5 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | yuv2planeX_mainloop %1 | 
														
													
														
															
																|  |  | 
 |  |  | 
 | 
														
													
														
															
																|  |  | %if %1 == 8 |  |  | %if %1 == 8 | 
														
													
														
															
																|  |  | %if ARCH_X86_32 |  |  | %if ARCH_X86_32 | 
														
													
												
													
														
															
																|  | 
 |