Originally committed as revision 5228 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -212,3 +212,4 @@ BeOS Francois Revol | |||||
| i386 Michael Niedermayer | i386 Michael Niedermayer | ||||
| Mac OS X / PowerPC Romain Dolbeau | Mac OS X / PowerPC Romain Dolbeau | ||||
| Amiga / PowerPC Colin Ward | Amiga / PowerPC Colin Ward | ||||
| Linux / PowerPC Luca Barbato | |||||
| @@ -388,7 +388,7 @@ endif | |||||
| ifeq ($(TARGET_ALTIVEC),yes) | ifeq ($(TARGET_ALTIVEC),yes) | ||||
| OBJS += ppc/dsputil_altivec.o ppc/mpegvideo_altivec.o ppc/idct_altivec.o \ | OBJS += ppc/dsputil_altivec.o ppc/mpegvideo_altivec.o ppc/idct_altivec.o \ | ||||
| ppc/fft_altivec.o ppc/gmc_altivec.o ppc/fdct_altivec.o \ | ppc/fft_altivec.o ppc/gmc_altivec.o ppc/fdct_altivec.o \ | ||||
| ppc/dsputil_h264_altivec.o | |||||
| ppc/dsputil_h264_altivec.o ppc/dsputil_snow_altivec.o | |||||
| endif | endif | ||||
| ifeq ($(TARGET_ARCH_SH4),yes) | ifeq ($(TARGET_ARCH_SH4),yes) | ||||
| @@ -21,35 +21,6 @@ | |||||
| #include "../snow.h" | #include "../snow.h" | ||||
| #include "mmx.h" | #include "mmx.h" | ||||
| static void always_inline snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ | |||||
| (*i) = (width) - 2; | |||||
| if (width & 1){ | |||||
| low[(*i)+1] = low[((*i)+1)>>1]; | |||||
| (*i)--; | |||||
| } | |||||
| } | |||||
| static void always_inline snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ | |||||
| for(; i<w; i++){ | |||||
| dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift); | |||||
| } | |||||
| if((width^lift_high)&1){ | |||||
| dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift); | |||||
| } | |||||
| } | |||||
| static void always_inline snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ | |||||
| for(; i<w; i++){ | |||||
| dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS); | |||||
| } | |||||
| if(width&1){ | |||||
| dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS); | |||||
| } | |||||
| } | |||||
| void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){ | void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){ | ||||
| const int w2= (width+1)>>1; | const int w2= (width+1)>>1; | ||||
| // SSE2 code runs faster with pointers aligned on a 32-byte boundary. | // SSE2 code runs faster with pointers aligned on a 32-byte boundary. | ||||
| @@ -30,6 +30,17 @@ extern void fdct_altivec(int16_t *block); | |||||
| extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | ||||
| extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | ||||
| extern void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width); | |||||
| extern void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, | |||||
| DWTELEM *b2, DWTELEM *b3, | |||||
| DWTELEM *b4, DWTELEM *b5, | |||||
| int width); | |||||
| extern void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride, | |||||
| uint8_t * * block, int b_w, int b_h, | |||||
| int src_x, int src_y, int src_stride, | |||||
| slice_buffer * sb, int add, | |||||
| uint8_t * dst8); | |||||
| int mm_flags = 0; | int mm_flags = 0; | ||||
| int mm_support(void) | int mm_support(void) | ||||
| @@ -297,6 +308,11 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | ||||
| #endif | #endif | ||||
| c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec; | |||||
| c->vertical_compose97i = ff_snow_vertical_compose97i_altivec; | |||||
| c->inner_add_yblock = ff_snow_inner_add_yblock_altivec; | |||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||
| if (avctx->dct_algo == FF_DCT_AUTO || | if (avctx->dct_algo == FF_DCT_AUTO || | ||||
| avctx->dct_algo == FF_DCT_ALTIVEC) | avctx->dct_algo == FF_DCT_ALTIVEC) | ||||
| @@ -120,4 +120,43 @@ extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, D | |||||
| extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); | extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); | ||||
| extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | ||||
| /* C bits used by mmx/sse2/altivec */ | |||||
| static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ | |||||
| (*i) = (width) - 2; | |||||
| if (width & 1){ | |||||
| low[(*i)+1] = low[((*i)+1)>>1]; | |||||
| (*i)--; | |||||
| } | |||||
| } | |||||
| static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){ | |||||
| for (; (*i)>=0; (*i)-=2){ | |||||
| low[(*i)+1] = high[(*i)>>1]; | |||||
| low[*i] = low[(*i)>>1]; | |||||
| } | |||||
| } | |||||
| static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ | |||||
| for(; i<w; i++){ | |||||
| dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift); | |||||
| } | |||||
| if((width^lift_high)&1){ | |||||
| dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift); | |||||
| } | |||||
| } | |||||
| static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ | |||||
| for(; i<w; i++){ | |||||
| dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS); | |||||
| } | |||||
| if(width&1){ | |||||
| dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS); | |||||
| } | |||||
| } | |||||
| #endif | #endif | ||||