Originally committed as revision 7764 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -388,7 +388,7 @@ typedef struct DSPContext { | |||||
/* snow wavelet */ | /* snow wavelet */ | ||||
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | ||||
void (*horizontal_compose97i)(DWTELEM *b, int width); | void (*horizontal_compose97i)(DWTELEM *b, int width); | ||||
void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | |||||
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | |||||
void (*prefetch)(void *mem, int stride, int h); | void (*prefetch)(void *mem, int stride, int h); | ||||
@@ -3042,9 +3042,9 @@ extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); | |||||
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); | extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); | ||||
extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | ||||
extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | ||||
extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
extern void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | ||||
extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
extern void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | ||||
#endif | #endif | ||||
@@ -708,7 +708,7 @@ void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTE | |||||
"dec %2 \n\t"\ | "dec %2 \n\t"\ | ||||
snow_inner_add_yblock_sse2_end_common2 | snow_inner_add_yblock_sse2_end_common2 | ||||
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
snow_inner_add_yblock_sse2_header | snow_inner_add_yblock_sse2_header | ||||
snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") | snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") | ||||
@@ -756,7 +756,7 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") | |||||
snow_inner_add_yblock_sse2_end_8 | snow_inner_add_yblock_sse2_end_8 | ||||
} | } | ||||
static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
snow_inner_add_yblock_sse2_header | snow_inner_add_yblock_sse2_header | ||||
snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") | snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") | ||||
@@ -868,7 +868,7 @@ snow_inner_add_yblock_sse2_end_16 | |||||
"rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ | "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ | ||||
"%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); | "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); | ||||
static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
snow_inner_add_yblock_mmx_header | snow_inner_add_yblock_mmx_header | ||||
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") | snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") | ||||
@@ -879,7 +879,7 @@ snow_inner_add_yblock_mmx_mix("0", "0") | |||||
snow_inner_add_yblock_mmx_end("16") | snow_inner_add_yblock_mmx_end("16") | ||||
} | } | ||||
static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, | |||||
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
snow_inner_add_yblock_mmx_header | snow_inner_add_yblock_mmx_header | ||||
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") | snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") | ||||
@@ -896,7 +896,7 @@ snow_inner_add_yblock_mmx_mix("32", "8") | |||||
snow_inner_add_yblock_mmx_end("32") | snow_inner_add_yblock_mmx_end("32") | ||||
} | } | ||||
void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
if (b_w == 16) | if (b_w == 16) | ||||
@@ -910,7 +910,7 @@ void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t | |||||
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | ||||
} | } | ||||
void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
if (b_w == 16) | if (b_w == 16) | ||||
inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | ||||
@@ -2521,16 +2521,16 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i | |||||
} | } | ||||
} | } | ||||
void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |||||
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | ||||
int y, x; | int y, x; | ||||
DWTELEM * dst; | DWTELEM * dst; | ||||
for(y=0; y<b_h; y++){ | for(y=0; y<b_h; y++){ | ||||
//FIXME ugly missue of obmc_stride | //FIXME ugly missue of obmc_stride | ||||
uint8_t *obmc1= obmc + y*obmc_stride; | |||||
uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |||||
uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |||||
uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |||||
const uint8_t *obmc1= obmc + y*obmc_stride; | |||||
const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |||||
const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |||||
const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |||||
dst = slice_buffer_get_line(sb, src_y + y); | dst = slice_buffer_get_line(sb, src_y + y); | ||||
for(x=0; x<b_w; x++){ | for(x=0; x<b_w; x++){ | ||||
int v= obmc1[x] * block[3][x + y*src_stride] | int v= obmc1[x] * block[3][x + y*src_stride] | ||||
@@ -2687,10 +2687,10 @@ assert(src_stride > 2*MB_SIZE + 5); | |||||
}else | }else | ||||
for(y=0; y<b_h; y++){ | for(y=0; y<b_h; y++){ | ||||
//FIXME ugly missue of obmc_stride | //FIXME ugly missue of obmc_stride | ||||
uint8_t *obmc1= obmc + y*obmc_stride; | |||||
uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |||||
uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |||||
uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |||||
const uint8_t *obmc1= obmc + y*obmc_stride; | |||||
const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |||||
const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |||||
const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |||||
for(x=0; x<b_w; x++){ | for(x=0; x<b_w; x++){ | ||||
int v= obmc1[x] * block[3][x + y*src_stride] | int v= obmc1[x] * block[3][x + y*src_stride] | ||||
+obmc2[x] * block[2][x + y*src_stride] | +obmc2[x] * block[2][x + y*src_stride] | ||||
@@ -125,7 +125,7 @@ struct slice_buffer_s { | |||||
extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | ||||
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); | extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); | ||||
extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | |||||
extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | |||||
#ifdef CONFIG_SNOW_ENCODER | #ifdef CONFIG_SNOW_ENCODER | ||||
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); | int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); | ||||