Originally committed as revision 10023 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -212,7 +212,7 @@ void ff_cavs_load_intra_pred_chroma(AVSContext *h) { | |||||
| static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) { | static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) { | ||||
| int y; | int y; | ||||
| uint64_t a = LD64(&top[1]); | |||||
| uint64_t a = AV_RN64(&top[1]); | |||||
| for(y=0;y<8;y++) { | for(y=0;y<8;y++) { | ||||
| *((uint64_t *)(d+y*stride)) = a; | *((uint64_t *)(d+y*stride)) = a; | ||||
| } | } | ||||
| @@ -608,7 +608,7 @@ static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si | |||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(*((uint64_t*)block), LD64(pixels));\ | |||||
| OP(*((uint64_t*)block), AV_RN64(pixels));\ | |||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| }\ | }\ | ||||
| @@ -618,8 +618,8 @@ static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, | |||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+1);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+1);\ | |||||
| OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| @@ -630,8 +630,8 @@ static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int li | |||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+1);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+1);\ | |||||
| OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| @@ -642,8 +642,8 @@ static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, | |||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+line_size);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+line_size);\ | |||||
| OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| @@ -654,8 +654,8 @@ static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int li | |||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+line_size);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+line_size);\ | |||||
| OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| @@ -665,8 +665,8 @@ static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int li | |||||
| static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+1);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+1);\ | |||||
| uint64_t l0= (a&0x0303030303030303ULL)\ | uint64_t l0= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL)\ | + (b&0x0303030303030303ULL)\ | ||||
| + 0x0202020202020202ULL;\ | + 0x0202020202020202ULL;\ | ||||
| @@ -676,8 +676,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l | |||||
| \ | \ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| for(i=0; i<h; i+=2){\ | for(i=0; i<h; i+=2){\ | ||||
| uint64_t a= LD64(pixels );\ | |||||
| uint64_t b= LD64(pixels+1);\ | |||||
| uint64_t a= AV_RN64(pixels );\ | |||||
| uint64_t b= AV_RN64(pixels+1);\ | |||||
| l1= (a&0x0303030303030303ULL)\ | l1= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL);\ | + (b&0x0303030303030303ULL);\ | ||||
| h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||||
| @@ -685,8 +685,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l | |||||
| OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| a= LD64(pixels );\ | |||||
| b= LD64(pixels+1);\ | |||||
| a= AV_RN64(pixels );\ | |||||
| b= AV_RN64(pixels+1);\ | |||||
| l0= (a&0x0303030303030303ULL)\ | l0= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL)\ | + (b&0x0303030303030303ULL)\ | ||||
| + 0x0202020202020202ULL;\ | + 0x0202020202020202ULL;\ | ||||
| @@ -701,8 +701,8 @@ static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int l | |||||
| static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| const uint64_t a= LD64(pixels );\ | |||||
| const uint64_t b= LD64(pixels+1);\ | |||||
| const uint64_t a= AV_RN64(pixels );\ | |||||
| const uint64_t b= AV_RN64(pixels+1);\ | |||||
| uint64_t l0= (a&0x0303030303030303ULL)\ | uint64_t l0= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL)\ | + (b&0x0303030303030303ULL)\ | ||||
| + 0x0101010101010101ULL;\ | + 0x0101010101010101ULL;\ | ||||
| @@ -712,8 +712,8 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels | |||||
| \ | \ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| for(i=0; i<h; i+=2){\ | for(i=0; i<h; i+=2){\ | ||||
| uint64_t a= LD64(pixels );\ | |||||
| uint64_t b= LD64(pixels+1);\ | |||||
| uint64_t a= AV_RN64(pixels );\ | |||||
| uint64_t b= AV_RN64(pixels+1);\ | |||||
| l1= (a&0x0303030303030303ULL)\ | l1= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL);\ | + (b&0x0303030303030303ULL);\ | ||||
| h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||||
| @@ -721,8 +721,8 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels | |||||
| OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| a= LD64(pixels );\ | |||||
| b= LD64(pixels+1);\ | |||||
| a= AV_RN64(pixels );\ | |||||
| b= AV_RN64(pixels+1);\ | |||||
| l0= (a&0x0303030303030303ULL)\ | l0= (a&0x0303030303030303ULL)\ | ||||
| + (b&0x0303030303030303ULL)\ | + (b&0x0303030303030303ULL)\ | ||||
| + 0x0101010101010101ULL;\ | + 0x0101010101010101ULL;\ | ||||
| @@ -749,7 +749,7 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, | |||||
| static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(*((uint16_t*)(block )), LD16(pixels ));\ | |||||
| OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ | |||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| }\ | }\ | ||||
| @@ -757,7 +757,7 @@ static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line | |||||
| static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(*((uint32_t*)(block )), LD32(pixels ));\ | |||||
| OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | |||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| }\ | }\ | ||||
| @@ -765,8 +765,8 @@ static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line | |||||
| static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(*((uint32_t*)(block )), LD32(pixels ));\ | |||||
| OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |||||
| OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | |||||
| OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ | |||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| }\ | }\ | ||||
| @@ -780,11 +780,11 @@ static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a,b;\ | uint32_t a,b;\ | ||||
| a= LD32(&src1[i*src_stride1 ]);\ | |||||
| b= LD32(&src2[i*src_stride2 ]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1 ]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2 ]);\ | |||||
| OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | ||||
| a= LD32(&src1[i*src_stride1+4]);\ | |||||
| b= LD32(&src2[i*src_stride2+4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1+4]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2+4]);\ | |||||
| OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | ||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| @@ -794,11 +794,11 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a,b;\ | uint32_t a,b;\ | ||||
| a= LD32(&src1[i*src_stride1 ]);\ | |||||
| b= LD32(&src2[i*src_stride2 ]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1 ]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2 ]);\ | |||||
| OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||||
| a= LD32(&src1[i*src_stride1+4]);\ | |||||
| b= LD32(&src2[i*src_stride2+4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1+4]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2+4]);\ | |||||
| OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | ||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| @@ -808,8 +808,8 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a,b;\ | uint32_t a,b;\ | ||||
| a= LD32(&src1[i*src_stride1 ]);\ | |||||
| b= LD32(&src2[i*src_stride2 ]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1 ]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2 ]);\ | |||||
| OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| @@ -819,8 +819,8 @@ static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, cons | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a,b;\ | uint32_t a,b;\ | ||||
| a= LD16(&src1[i*src_stride1 ]);\ | |||||
| b= LD16(&src2[i*src_stride2 ]);\ | |||||
| a= AV_RN16(&src1[i*src_stride1 ]);\ | |||||
| b= AV_RN16(&src2[i*src_stride2 ]);\ | |||||
| OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| @@ -858,10 +858,10 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a, b, c, d, l0, l1, h0, h1;\ | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||||
| a= LD32(&src1[i*src_stride1]);\ | |||||
| b= LD32(&src2[i*src_stride2]);\ | |||||
| c= LD32(&src3[i*src_stride3]);\ | |||||
| d= LD32(&src4[i*src_stride4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2]);\ | |||||
| c= AV_RN32(&src3[i*src_stride3]);\ | |||||
| d= AV_RN32(&src4[i*src_stride4]);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -872,10 +872,10 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint | |||||
| h1= ((c&0xFCFCFCFCUL)>>2)\ | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||||
| + ((d&0xFCFCFCFCUL)>>2);\ | + ((d&0xFCFCFCFCUL)>>2);\ | ||||
| OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||||
| a= LD32(&src1[i*src_stride1+4]);\ | |||||
| b= LD32(&src2[i*src_stride2+4]);\ | |||||
| c= LD32(&src3[i*src_stride3+4]);\ | |||||
| d= LD32(&src4[i*src_stride4+4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1+4]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2+4]);\ | |||||
| c= AV_RN32(&src3[i*src_stride3+4]);\ | |||||
| d= AV_RN32(&src4[i*src_stride4+4]);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -910,10 +910,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src | |||||
| int i;\ | int i;\ | ||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| uint32_t a, b, c, d, l0, l1, h0, h1;\ | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||||
| a= LD32(&src1[i*src_stride1]);\ | |||||
| b= LD32(&src2[i*src_stride2]);\ | |||||
| c= LD32(&src3[i*src_stride3]);\ | |||||
| d= LD32(&src4[i*src_stride4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2]);\ | |||||
| c= AV_RN32(&src3[i*src_stride3]);\ | |||||
| d= AV_RN32(&src4[i*src_stride4]);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x01010101UL;\ | + 0x01010101UL;\ | ||||
| @@ -924,10 +924,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src | |||||
| h1= ((c&0xFCFCFCFCUL)>>2)\ | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||||
| + ((d&0xFCFCFCFCUL)>>2);\ | + ((d&0xFCFCFCFCUL)>>2);\ | ||||
| OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||||
| a= LD32(&src1[i*src_stride1+4]);\ | |||||
| b= LD32(&src2[i*src_stride2+4]);\ | |||||
| c= LD32(&src3[i*src_stride3+4]);\ | |||||
| d= LD32(&src4[i*src_stride4+4]);\ | |||||
| a= AV_RN32(&src1[i*src_stride1+4]);\ | |||||
| b= AV_RN32(&src2[i*src_stride2+4]);\ | |||||
| c= AV_RN32(&src3[i*src_stride3+4]);\ | |||||
| d= AV_RN32(&src4[i*src_stride4+4]);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x01010101UL;\ | + 0x01010101UL;\ | ||||
| @@ -987,8 +987,8 @@ static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||||
| {\ | {\ | ||||
| int i;\ | int i;\ | ||||
| const uint32_t a= LD32(pixels );\ | |||||
| const uint32_t b= LD32(pixels+1);\ | |||||
| const uint32_t a= AV_RN32(pixels );\ | |||||
| const uint32_t b= AV_RN32(pixels+1);\ | |||||
| uint32_t l0= (a&0x03030303UL)\ | uint32_t l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -998,8 +998,8 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| \ | \ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| for(i=0; i<h; i+=2){\ | for(i=0; i<h; i+=2){\ | ||||
| uint32_t a= LD32(pixels );\ | |||||
| uint32_t b= LD32(pixels+1);\ | |||||
| uint32_t a= AV_RN32(pixels );\ | |||||
| uint32_t b= AV_RN32(pixels+1);\ | |||||
| l1= (a&0x03030303UL)\ | l1= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL);\ | + (b&0x03030303UL);\ | ||||
| h1= ((a&0xFCFCFCFCUL)>>2)\ | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||||
| @@ -1007,8 +1007,8 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| a= LD32(pixels );\ | |||||
| b= LD32(pixels+1);\ | |||||
| a= AV_RN32(pixels );\ | |||||
| b= AV_RN32(pixels+1);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -1025,8 +1025,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| int j;\ | int j;\ | ||||
| for(j=0; j<2; j++){\ | for(j=0; j<2; j++){\ | ||||
| int i;\ | int i;\ | ||||
| const uint32_t a= LD32(pixels );\ | |||||
| const uint32_t b= LD32(pixels+1);\ | |||||
| const uint32_t a= AV_RN32(pixels );\ | |||||
| const uint32_t b= AV_RN32(pixels+1);\ | |||||
| uint32_t l0= (a&0x03030303UL)\ | uint32_t l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -1036,8 +1036,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| \ | \ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| for(i=0; i<h; i+=2){\ | for(i=0; i<h; i+=2){\ | ||||
| uint32_t a= LD32(pixels );\ | |||||
| uint32_t b= LD32(pixels+1);\ | |||||
| uint32_t a= AV_RN32(pixels );\ | |||||
| uint32_t b= AV_RN32(pixels+1);\ | |||||
| l1= (a&0x03030303UL)\ | l1= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL);\ | + (b&0x03030303UL);\ | ||||
| h1= ((a&0xFCFCFCFCUL)>>2)\ | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||||
| @@ -1045,8 +1045,8 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel | |||||
| OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| a= LD32(pixels );\ | |||||
| b= LD32(pixels+1);\ | |||||
| a= AV_RN32(pixels );\ | |||||
| b= AV_RN32(pixels+1);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x02020202UL;\ | + 0x02020202UL;\ | ||||
| @@ -1066,8 +1066,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t | |||||
| int j;\ | int j;\ | ||||
| for(j=0; j<2; j++){\ | for(j=0; j<2; j++){\ | ||||
| int i;\ | int i;\ | ||||
| const uint32_t a= LD32(pixels );\ | |||||
| const uint32_t b= LD32(pixels+1);\ | |||||
| const uint32_t a= AV_RN32(pixels );\ | |||||
| const uint32_t b= AV_RN32(pixels+1);\ | |||||
| uint32_t l0= (a&0x03030303UL)\ | uint32_t l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x01010101UL;\ | + 0x01010101UL;\ | ||||
| @@ -1077,8 +1077,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t | |||||
| \ | \ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| for(i=0; i<h; i+=2){\ | for(i=0; i<h; i+=2){\ | ||||
| uint32_t a= LD32(pixels );\ | |||||
| uint32_t b= LD32(pixels+1);\ | |||||
| uint32_t a= AV_RN32(pixels );\ | |||||
| uint32_t b= AV_RN32(pixels+1);\ | |||||
| l1= (a&0x03030303UL)\ | l1= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL);\ | + (b&0x03030303UL);\ | ||||
| h1= ((a&0xFCFCFCFCUL)>>2)\ | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||||
| @@ -1086,8 +1086,8 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t | |||||
| OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||||
| pixels+=line_size;\ | pixels+=line_size;\ | ||||
| block +=line_size;\ | block +=line_size;\ | ||||
| a= LD32(pixels );\ | |||||
| b= LD32(pixels+1);\ | |||||
| a= AV_RN32(pixels );\ | |||||
| b= AV_RN32(pixels+1);\ | |||||
| l0= (a&0x03030303UL)\ | l0= (a&0x03030303UL)\ | ||||
| + (b&0x03030303UL)\ | + (b&0x03030303UL)\ | ||||
| + 0x01010101UL;\ | + 0x01010101UL;\ | ||||
| @@ -675,7 +675,7 @@ static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int sr | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST16(dst , LD16(src )); | |||||
| AV_WN16(dst , AV_RN16(src )); | |||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| } | } | ||||
| @@ -686,7 +686,7 @@ static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int sr | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST32(dst , LD32(src )); | |||||
| AV_WN32(dst , AV_RN32(src )); | |||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| } | } | ||||
| @@ -697,8 +697,8 @@ static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int sr | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST32(dst , LD32(src )); | |||||
| ST32(dst+4 , LD32(src+4 )); | |||||
| AV_WN32(dst , AV_RN32(src )); | |||||
| AV_WN32(dst+4 , AV_RN32(src+4 )); | |||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| } | } | ||||
| @@ -709,8 +709,8 @@ static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int sr | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST32(dst , LD32(src )); | |||||
| ST32(dst+4 , LD32(src+4 )); | |||||
| AV_WN32(dst , AV_RN32(src )); | |||||
| AV_WN32(dst+4 , AV_RN32(src+4 )); | |||||
| dst[8]= src[8]; | dst[8]= src[8]; | ||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| @@ -722,10 +722,10 @@ static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int s | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST32(dst , LD32(src )); | |||||
| ST32(dst+4 , LD32(src+4 )); | |||||
| ST32(dst+8 , LD32(src+8 )); | |||||
| ST32(dst+12, LD32(src+12)); | |||||
| AV_WN32(dst , AV_RN32(src )); | |||||
| AV_WN32(dst+4 , AV_RN32(src+4 )); | |||||
| AV_WN32(dst+8 , AV_RN32(src+8 )); | |||||
| AV_WN32(dst+12, AV_RN32(src+12)); | |||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| } | } | ||||
| @@ -736,10 +736,10 @@ static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int s | |||||
| int i; | int i; | ||||
| for(i=0; i<h; i++) | for(i=0; i<h; i++) | ||||
| { | { | ||||
| ST32(dst , LD32(src )); | |||||
| ST32(dst+4 , LD32(src+4 )); | |||||
| ST32(dst+8 , LD32(src+8 )); | |||||
| ST32(dst+12, LD32(src+12)); | |||||
| AV_WN32(dst , AV_RN32(src )); | |||||
| AV_WN32(dst+4 , AV_RN32(src+4 )); | |||||
| AV_WN32(dst+8 , AV_RN32(src+8 )); | |||||
| AV_WN32(dst+12, AV_RN32(src+12)); | |||||
| dst[16]= src[16]; | dst[16]= src[16]; | ||||
| dst+=dstStride; | dst+=dstStride; | ||||
| src+=srcStride; | src+=srcStride; | ||||
| @@ -81,7 +81,7 @@ static int ptx_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| #ifdef WORDS_BIGENDIAN | #ifdef WORDS_BIGENDIAN | ||||
| unsigned int x; | unsigned int x; | ||||
| for (x=0; x<w*bytes_per_pixel; x+=bytes_per_pixel) | for (x=0; x<w*bytes_per_pixel; x+=bytes_per_pixel) | ||||
| ST16(ptr+x, AV_RL16(buf+x)); | |||||
| AV_WN16(ptr+x, AV_RL16(buf+x)); | |||||
| #else | #else | ||||
| memcpy(ptr, buf, w*bytes_per_pixel); | memcpy(ptr, buf, w*bytes_per_pixel); | ||||
| #endif | #endif | ||||
| @@ -21,14 +21,12 @@ | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| */ | */ | ||||
| #define LD(adr) *(uint32_t*)(adr) | |||||
| #define PIXOP2(OPNAME, OP) \ | #define PIXOP2(OPNAME, OP) \ | ||||
| /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | |||||
| OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -38,8 +36,8 @@ | |||||
| static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -49,7 +47,7 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons | |||||
| static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -59,10 +57,10 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons | |||||
| static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | |||||
| OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ | |||||
| OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ | |||||
| OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ | |||||
| OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \ | |||||
| OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -72,10 +70,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *sr | |||||
| static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ | |||||
| OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ | |||||
| OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \ | |||||
| OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \ | |||||
| OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -95,7 +93,7 @@ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr | |||||
| static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -105,10 +103,10 @@ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s | |||||
| static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ | |||||
| OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ | |||||
| OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ | |||||
| OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -118,10 +116,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui | |||||
| static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ | |||||
| OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \ | |||||
| OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -131,8 +129,8 @@ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t * | |||||
| static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do { /* onlye src2 aligned */\ | do { /* onlye src2 aligned */\ | ||||
| OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -142,8 +140,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin | |||||
| static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ | ||||
| {\ | {\ | ||||
| do {\ | do {\ | ||||
| OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ | |||||
| OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \ | |||||
| OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \ | |||||
| src1+=src_stride1; \ | src1+=src_stride1; \ | ||||
| src2+=src_stride2; \ | src2+=src_stride2; \ | ||||
| dst+=dst_stride; \ | dst+=dst_stride; \ | ||||
| @@ -247,10 +245,10 @@ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint | |||||
| static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||||
| do { \ | do { \ | ||||
| uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ | uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ | ||||
| UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | |||||
| UNPACK(a2,a3,LP(src3),LP(src4)); \ | UNPACK(a2,a3,LP(src3),LP(src4)); \ | ||||
| OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | ||||
| OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| src1+=src_stride1;\ | src1+=src_stride1;\ | ||||
| @@ -264,10 +262,10 @@ static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *s | |||||
| static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||||
| do { \ | do { \ | ||||
| uint32_t a0,a1,a2,a3; \ | uint32_t a0,a1,a2,a3; \ | ||||
| UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | |||||
| UNPACK(a2,a3,LP(src3),LP(src4)); \ | UNPACK(a2,a3,LP(src3),LP(src4)); \ | ||||
| OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | ||||
| OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| src1+=src_stride1;\ | src1+=src_stride1;\ | ||||
| @@ -327,16 +325,16 @@ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uin | |||||
| static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||||
| do { /* src1 is unaligned */\ | do { /* src1 is unaligned */\ | ||||
| uint32_t a0,a1,a2,a3; \ | uint32_t a0,a1,a2,a3; \ | ||||
| UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | |||||
| UNPACK(a2,a3,LP(src3),LP(src4)); \ | UNPACK(a2,a3,LP(src3),LP(src4)); \ | ||||
| OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | ||||
| OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ | |||||
| UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | ||||
| OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ | |||||
| UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | ||||
| OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ | ||||
| src1+=src_stride1;\ | src1+=src_stride1;\ | ||||
| @@ -350,16 +348,16 @@ static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t * | |||||
| static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||||
| do { \ | do { \ | ||||
| uint32_t a0,a1,a2,a3; \ | uint32_t a0,a1,a2,a3; \ | ||||
| UNPACK(a0,a1,LD32(src1),LP(src2)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \ | |||||
| UNPACK(a2,a3,LP(src3),LP(src4)); \ | UNPACK(a2,a3,LP(src3),LP(src4)); \ | ||||
| OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \ | |||||
| UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ | ||||
| OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \ | |||||
| UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ | ||||
| OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ | |||||
| UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \ | |||||
| UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ | ||||
| OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ | ||||
| src1+=src_stride1;\ | src1+=src_stride1;\ | ||||
| @@ -28,23 +28,23 @@ struct unaligned_64 { uint64_t l; } __attribute__((packed)); | |||||
| struct unaligned_32 { uint32_t l; } __attribute__((packed)); | struct unaligned_32 { uint32_t l; } __attribute__((packed)); | ||||
| struct unaligned_16 { uint16_t l; } __attribute__((packed)); | struct unaligned_16 { uint16_t l; } __attribute__((packed)); | ||||
| #define LD16(a) (((const struct unaligned_16 *) (a))->l) | |||||
| #define LD32(a) (((const struct unaligned_32 *) (a))->l) | |||||
| #define LD64(a) (((const struct unaligned_64 *) (a))->l) | |||||
| #define AV_RN16(a) (((const struct unaligned_16 *) (a))->l) | |||||
| #define AV_RN32(a) (((const struct unaligned_32 *) (a))->l) | |||||
| #define AV_RN64(a) (((const struct unaligned_64 *) (a))->l) | |||||
| #define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b) | |||||
| #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | |||||
| #define ST64(a, b) (((struct unaligned_64 *) (a))->l) = (b) | |||||
| #define AV_WN16(a, b) (((struct unaligned_16 *) (a))->l) = (b) | |||||
| #define AV_WN32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | |||||
| #define AV_WN64(a, b) (((struct unaligned_64 *) (a))->l) = (b) | |||||
| #else /* __GNUC__ */ | #else /* __GNUC__ */ | ||||
| #define LD16(a) (*((uint16_t*)(a))) | |||||
| #define LD32(a) (*((uint32_t*)(a))) | |||||
| #define LD64(a) (*((uint64_t*)(a))) | |||||
| #define AV_RN16(a) (*((uint16_t*)(a))) | |||||
| #define AV_RN32(a) (*((uint32_t*)(a))) | |||||
| #define AV_RN64(a) (*((uint64_t*)(a))) | |||||
| #define ST16(a, b) *((uint16_t*)(a)) = (b) | |||||
| #define ST32(a, b) *((uint32_t*)(a)) = (b) | |||||
| #define ST64(a, b) *((uint64_t*)(a)) = (b) | |||||
| #define AV_WN16(a, b) *((uint16_t*)(a)) = (b) | |||||
| #define AV_WN32(a, b) *((uint32_t*)(a)) = (b) | |||||
| #define AV_WN64(a, b) *((uint64_t*)(a)) = (b) | |||||
| #endif /* !__GNUC__ */ | #endif /* !__GNUC__ */ | ||||
| @@ -57,17 +57,17 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed)); | |||||
| #ifdef HAVE_FAST_UNALIGNED | #ifdef HAVE_FAST_UNALIGNED | ||||
| # ifdef WORDS_BIGENDIAN | # ifdef WORDS_BIGENDIAN | ||||
| # define AV_RB16(x) LD16(x) | |||||
| # define AV_WB16(p, d) ST16(p, d) | |||||
| # define AV_RB16(x) AV_RN16(x) | |||||
| # define AV_WB16(p, d) AV_WN16(p, d) | |||||
| # define AV_RL16(x) bswap_16(LD16(x)) | |||||
| # define AV_WL16(p, d) ST16(p, bswap_16(d)) | |||||
| # define AV_RL16(x) bswap_16(AV_RN16(x)) | |||||
| # define AV_WL16(p, d) AV_WN16(p, bswap_16(d)) | |||||
| # else /* WORDS_BIGENDIAN */ | # else /* WORDS_BIGENDIAN */ | ||||
| # define AV_RB16(x) bswap_16(LD16(x)) | |||||
| # define AV_WB16(p, d) ST16(p, bswap_16(d)) | |||||
| # define AV_RB16(x) bswap_16(AV_RN16(x)) | |||||
| # define AV_WB16(p, d) AV_WN16(p, bswap_16(d)) | |||||
| # define AV_RL16(x) LD16(x) | |||||
| # define AV_WL16(p, d) ST16(p, d) | |||||
| # define AV_RL16(x) AV_RN16(x) | |||||
| # define AV_WL16(p, d) AV_WN16(p, d) | |||||
| # endif | # endif | ||||
| #else /* HAVE_FAST_UNALIGNED */ | #else /* HAVE_FAST_UNALIGNED */ | ||||
| #define AV_RB16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1]) | #define AV_RB16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1]) | ||||
| @@ -100,17 +100,17 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed)); | |||||
| #ifdef HAVE_FAST_UNALIGNED | #ifdef HAVE_FAST_UNALIGNED | ||||
| # ifdef WORDS_BIGENDIAN | # ifdef WORDS_BIGENDIAN | ||||
| # define AV_RB32(x) LD32(x) | |||||
| # define AV_WB32(p, d) ST32(p, d) | |||||
| # define AV_RB32(x) AV_RN32(x) | |||||
| # define AV_WB32(p, d) AV_WN32(p, d) | |||||
| # define AV_RL32(x) bswap_32(LD32(x)) | |||||
| # define AV_WL32(p, d) ST32(p, bswap_32(d)) | |||||
| # define AV_RL32(x) bswap_32(AV_RN32(x)) | |||||
| # define AV_WL32(p, d) AV_WN32(p, bswap_32(d)) | |||||
| # else /* WORDS_BIGENDIAN */ | # else /* WORDS_BIGENDIAN */ | ||||
| # define AV_RB32(x) bswap_32(LD32(x)) | |||||
| # define AV_WB32(p, d) ST32(p, bswap_32(d)) | |||||
| # define AV_RB32(x) bswap_32(AV_RN32(x)) | |||||
| # define AV_WB32(p, d) AV_WN32(p, bswap_32(d)) | |||||
| # define AV_RL32(x) LD32(x) | |||||
| # define AV_WL32(p, d) ST32(p, d) | |||||
| # define AV_RL32(x) AV_RN32(x) | |||||
| # define AV_WL32(p, d) AV_WN32(p, d) | |||||
| # endif | # endif | ||||
| #else /* HAVE_FAST_UNALIGNED */ | #else /* HAVE_FAST_UNALIGNED */ | ||||
| #define AV_RB32(x) ((((uint8_t*)(x))[0] << 24) | \ | #define AV_RB32(x) ((((uint8_t*)(x))[0] << 24) | \ | ||||
| @@ -136,17 +136,17 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed)); | |||||
| #ifdef HAVE_FAST_UNALIGNED | #ifdef HAVE_FAST_UNALIGNED | ||||
| # ifdef WORDS_BIGENDIAN | # ifdef WORDS_BIGENDIAN | ||||
| # define AV_RB64(x) LD64(x) | |||||
| # define AV_WB64(p, d) ST64(p, d) | |||||
| # define AV_RB64(x) AV_RN64(x) | |||||
| # define AV_WB64(p, d) AV_WN64(p, d) | |||||
| # define AV_RL64(x) bswap_64(LD64(x)) | |||||
| # define AV_WL64(p, d) ST64(p, bswap_64(d)) | |||||
| # define AV_RL64(x) bswap_64(AV_RN64(x)) | |||||
| # define AV_WL64(p, d) AV_WN64(p, bswap_64(d)) | |||||
| # else /* WORDS_BIGENDIAN */ | # else /* WORDS_BIGENDIAN */ | ||||
| # define AV_RB64(x) bswap_64(LD64(x)) | |||||
| # define AV_WB64(p, d) ST64(p, bswap_64(d)) | |||||
| # define AV_RB64(x) bswap_64(AV_RN64(x)) | |||||
| # define AV_WB64(p, d) AV_WN64(p, bswap_64(d)) | |||||
| # define AV_RL64(x) LD64(x) | |||||
| # define AV_WL64(p, d) ST64(p, d) | |||||
| # define AV_RL64(x) AV_RN64(x) | |||||
| # define AV_WL64(p, d) AV_WN64(p, d) | |||||
| # endif | # endif | ||||
| #else /* HAVE_FAST_UNALIGNED */ | #else /* HAVE_FAST_UNALIGNED */ | ||||
| #define AV_RB64(x) (((uint64_t)((uint8_t*)(x))[0] << 56) | \ | #define AV_RB64(x) (((uint64_t)((uint8_t*)(x))[0] << 56) | \ | ||||