* qatar/master: SBR DSP: fix SSE code to not use SSE2 instructions. cpu: initialize mask to -1, so that by default, optimizations are used. error_resilience: initialize s->block_index[]. svq3: protect against negative quantizers. Don't use ff_cropTbl[] for IDCT. swscale: make filterPos 32bit. FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option. avconv: add -cpuflags option for setting supported cpuflags. cpu: add av_set_cpu_flags_mask(). libx264: Allow overriding the sliced threads option avconv: fix counting encoded video size. Conflicts: doc/APIchanges doc/fate.texi doc/ffmpeg.texi ffmpeg.c libavcodec/h264idct_template.c libavcodec/svq3.c libavutil/avutil.h libavutil/cpu.c libavutil/cpu.h libswscale/swscale.c tests/Makefile tests/fate-run.sh tests/regression-funcs.sh Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.11
| @@ -340,11 +340,8 @@ void parse_options(void *optctx, int argc, char **argv, const OptionDef *options | |||||
| } | } | ||||
| } | } | ||||
| /* | |||||
| * Return index of option opt in argv or 0 if not found. | |||||
| */ | |||||
| static int locate_option(int argc, char **argv, const OptionDef *options, | |||||
| const char *optname) | |||||
| int locate_option(int argc, char **argv, const OptionDef *options, | |||||
| const char *optname) | |||||
| { | { | ||||
| const OptionDef *po; | const OptionDef *po; | ||||
| int i; | int i; | ||||
| @@ -537,13 +534,54 @@ int opt_max_alloc(const char *opt, const char *arg) | |||||
| int opt_cpuflags(const char *opt, const char *arg) | int opt_cpuflags(const char *opt, const char *arg) | ||||
| { | { | ||||
| char *tail; | |||||
| long flags = strtol(arg, &tail, 10); | |||||
| #define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2) | |||||
| #define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX) | |||||
| #define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) | |||||
| #define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2) | |||||
| #define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE) | |||||
| #define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) | |||||
| #define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2) | |||||
| #define CPUFLAG_SSE3SLOW (AV_CPU_FLAG_SSE3SLOW | CPUFLAG_SSE3) | |||||
| #define CPUFLAG_SSSE3 (AV_CPU_FLAG_SSSE3 | CPUFLAG_SSE3) | |||||
| #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) | |||||
| #define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) | |||||
| #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) | |||||
| #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) | |||||
| #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) | |||||
| static const AVOption cpuflags_opts[] = { | |||||
| { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, | |||||
| { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, | |||||
| { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" }, | |||||
| { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" }, | |||||
| { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" }, | |||||
| { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" }, | |||||
| { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" }, | |||||
| { "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3 }, .unit = "flags" }, | |||||
| { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3SLOW }, .unit = "flags" }, | |||||
| { "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSSE3 }, .unit = "flags" }, | |||||
| { "atom" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ATOM }, .unit = "flags" }, | |||||
| { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE4 }, .unit = "flags" }, | |||||
| { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE42 }, .unit = "flags" }, | |||||
| { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_AVX }, .unit = "flags" }, | |||||
| { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_XOP }, .unit = "flags" }, | |||||
| { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4 }, .unit = "flags" }, | |||||
| { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW }, .unit = "flags" }, | |||||
| { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT }, .unit = "flags" }, | |||||
| { NULL }, | |||||
| }; | |||||
| static const AVClass class = { | |||||
| .class_name = "cpuflags", | |||||
| .item_name = av_default_item_name, | |||||
| .option = cpuflags_opts, | |||||
| .version = LIBAVUTIL_VERSION_INT, | |||||
| }; | |||||
| int flags = av_get_cpu_flags(); | |||||
| int ret; | |||||
| const AVClass *pclass = &class; | |||||
| if ((ret = av_opt_eval_flags(&pclass, &cpuflags_opts[0], arg, &flags)) < 0) | |||||
| return ret; | |||||
| if (*tail) { | |||||
| av_log(NULL, AV_LOG_FATAL, "Invalid cpuflags \"%s\".\n", arg); | |||||
| exit_program(1); | |||||
| } | |||||
| av_force_cpu_flags(flags); | av_force_cpu_flags(flags); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -206,6 +206,12 @@ int parse_option(void *optctx, const char *opt, const char *arg, | |||||
| */ | */ | ||||
| void parse_loglevel(int argc, char **argv, const OptionDef *options); | void parse_loglevel(int argc, char **argv, const OptionDef *options); | ||||
| /** | |||||
| * Return index of option opt in argv or 0 if not found. | |||||
| */ | |||||
| int locate_option(int argc, char **argv, const OptionDef *options, | |||||
| const char *optname); | |||||
| /** | /** | ||||
| * Check if the given stream matches a stream specifier. | * Check if the given stream matches a stream specifier. | ||||
| * | * | ||||
| @@ -16,4 +16,4 @@ | |||||
| { "debug", HAS_ARG, {(void*)opt_codec_debug}, "set debug flags", "flags" }, | { "debug", HAS_ARG, {(void*)opt_codec_debug}, "set debug flags", "flags" }, | ||||
| { "report", 0, {(void*)opt_report}, "generate a report" }, | { "report", 0, {(void*)opt_report}, "generate a report" }, | ||||
| { "max_alloc", HAS_ARG, {(void*)opt_max_alloc}, "set maximum size of a single allocated block", "bytes" }, | { "max_alloc", HAS_ARG, {(void*)opt_max_alloc}, "set maximum size of a single allocated block", "bytes" }, | ||||
| { "cpuflags", HAS_ARG, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" }, | |||||
| { "cpuflags", HAS_ARG | OPT_EXPERT, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" }, | |||||
| @@ -134,6 +134,10 @@ It also implies @code{-loglevel verbose}. | |||||
| Note: setting the environment variable @code{FFREPORT} to any value has the | Note: setting the environment variable @code{FFREPORT} to any value has the | ||||
| same effect. | same effect. | ||||
| @item -cpuflags flags (@emph{global}) | |||||
| Allows setting and clearing cpu flags. This option is intended | |||||
| for testing. Do not use it unless you know what you're doing. | |||||
| @end table | @end table | ||||
| @section AVOptions | @section AVOptions | ||||
| @@ -166,9 +166,11 @@ the synchronisation of the samples directory. | |||||
| @item THREADS | @item THREADS | ||||
| Specify how many threads to use while running regression tests, it is | Specify how many threads to use while running regression tests, it is | ||||
| quite useful to detect thread-related regressions. | quite useful to detect thread-related regressions. | ||||
| @item CPUFLAGS | |||||
| Specify CPU flags. | |||||
| @end table | @end table | ||||
| Example: | Example: | ||||
| @example | @example | ||||
| make V=1 SAMPLES=/var/fate/samples THREADS=2 fate | |||||
| @end example | |||||
| make V=1 SAMPLES=/var/fate/samples THREADS=2 CPUFLAGS=mmx fate | |||||
| @end example | |||||
| @@ -4963,6 +4963,13 @@ static int opt_deinterlace(const char *opt, const char *arg) | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static void parse_cpuflags(int argc, char **argv, const OptionDef *options) | |||||
| { | |||||
| int idx = locate_option(argc, argv, options, "cpuflags"); | |||||
| if (idx && argv[idx + 1]) | |||||
| opt_cpuflags("cpuflags", argv[idx + 1]); | |||||
| } | |||||
| #define OFFSET(x) offsetof(OptionsContext, x) | #define OFFSET(x) offsetof(OptionsContext, x) | ||||
| static const OptionDef options[] = { | static const OptionDef options[] = { | ||||
| /* main options */ | /* main options */ | ||||
| @@ -5136,6 +5143,8 @@ int main(int argc, char **argv) | |||||
| term_init(); | term_init(); | ||||
| parse_cpuflags(argc, argv, options); | |||||
| /* parse options */ | /* parse options */ | ||||
| parse_options(&o, argc, argv, options, opt_output_file); | parse_options(&o, argc, argv, options, opt_output_file); | ||||
| @@ -367,18 +367,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<8;i++) { | for(i=0;i<8;i++) { | ||||
| pixels[0] = cm[block[0]]; | |||||
| pixels[1] = cm[block[1]]; | |||||
| pixels[2] = cm[block[2]]; | |||||
| pixels[3] = cm[block[3]]; | |||||
| pixels[4] = cm[block[4]]; | |||||
| pixels[5] = cm[block[5]]; | |||||
| pixels[6] = cm[block[6]]; | |||||
| pixels[7] = cm[block[7]]; | |||||
| pixels[0] = av_clip_uint8(block[0]); | |||||
| pixels[1] = av_clip_uint8(block[1]); | |||||
| pixels[2] = av_clip_uint8(block[2]); | |||||
| pixels[3] = av_clip_uint8(block[3]); | |||||
| pixels[4] = av_clip_uint8(block[4]); | |||||
| pixels[5] = av_clip_uint8(block[5]); | |||||
| pixels[6] = av_clip_uint8(block[6]); | |||||
| pixels[7] = av_clip_uint8(block[7]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| @@ -389,14 +388,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<4;i++) { | for(i=0;i<4;i++) { | ||||
| pixels[0] = cm[block[0]]; | |||||
| pixels[1] = cm[block[1]]; | |||||
| pixels[2] = cm[block[2]]; | |||||
| pixels[3] = cm[block[3]]; | |||||
| pixels[0] = av_clip_uint8(block[0]); | |||||
| pixels[1] = av_clip_uint8(block[1]); | |||||
| pixels[2] = av_clip_uint8(block[2]); | |||||
| pixels[3] = av_clip_uint8(block[3]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| @@ -407,12 +405,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<2;i++) { | for(i=0;i<2;i++) { | ||||
| pixels[0] = cm[block[0]]; | |||||
| pixels[1] = cm[block[1]]; | |||||
| pixels[0] = av_clip_uint8(block[0]); | |||||
| pixels[1] = av_clip_uint8(block[1]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| @@ -444,18 +441,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<8;i++) { | for(i=0;i<8;i++) { | ||||
| pixels[0] = cm[pixels[0] + block[0]]; | |||||
| pixels[1] = cm[pixels[1] + block[1]]; | |||||
| pixels[2] = cm[pixels[2] + block[2]]; | |||||
| pixels[3] = cm[pixels[3] + block[3]]; | |||||
| pixels[4] = cm[pixels[4] + block[4]]; | |||||
| pixels[5] = cm[pixels[5] + block[5]]; | |||||
| pixels[6] = cm[pixels[6] + block[6]]; | |||||
| pixels[7] = cm[pixels[7] + block[7]]; | |||||
| pixels[0] = av_clip_uint8(pixels[0] + block[0]); | |||||
| pixels[1] = av_clip_uint8(pixels[1] + block[1]); | |||||
| pixels[2] = av_clip_uint8(pixels[2] + block[2]); | |||||
| pixels[3] = av_clip_uint8(pixels[3] + block[3]); | |||||
| pixels[4] = av_clip_uint8(pixels[4] + block[4]); | |||||
| pixels[5] = av_clip_uint8(pixels[5] + block[5]); | |||||
| pixels[6] = av_clip_uint8(pixels[6] + block[6]); | |||||
| pixels[7] = av_clip_uint8(pixels[7] + block[7]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| } | } | ||||
| @@ -465,14 +461,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<4;i++) { | for(i=0;i<4;i++) { | ||||
| pixels[0] = cm[pixels[0] + block[0]]; | |||||
| pixels[1] = cm[pixels[1] + block[1]]; | |||||
| pixels[2] = cm[pixels[2] + block[2]]; | |||||
| pixels[3] = cm[pixels[3] + block[3]]; | |||||
| pixels[0] = av_clip_uint8(pixels[0] + block[0]); | |||||
| pixels[1] = av_clip_uint8(pixels[1] + block[1]); | |||||
| pixels[2] = av_clip_uint8(pixels[2] + block[2]); | |||||
| pixels[3] = av_clip_uint8(pixels[3] + block[3]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| } | } | ||||
| @@ -482,12 +477,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels | |||||
| int line_size) | int line_size) | ||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| /* read the pixels */ | /* read the pixels */ | ||||
| for(i=0;i<2;i++) { | for(i=0;i<2;i++) { | ||||
| pixels[0] = cm[pixels[0] + block[0]]; | |||||
| pixels[1] = cm[pixels[1] + block[1]]; | |||||
| pixels[0] = av_clip_uint8(pixels[0] + block[0]); | |||||
| pixels[1] = av_clip_uint8(pixels[1] + block[1]); | |||||
| pixels += line_size; | pixels += line_size; | ||||
| block += 8; | block += 8; | ||||
| } | } | ||||
| @@ -2779,15 +2773,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) | static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | { | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| dest[0] = cm[(block[0] + 4)>>3]; | |||||
| dest[0] = av_clip_uint8((block[0] + 4)>>3); | |||||
| } | } | ||||
| static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | { | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |||||
| dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); | |||||
| } | } | ||||
| static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } | static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } | ||||
| @@ -440,9 +440,14 @@ static void guess_mv(MpegEncContext *s) | |||||
| if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || | if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || | ||||
| num_avail <= mb_width / 2) { | num_avail <= mb_width / 2) { | ||||
| for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | ||||
| s->mb_x = 0; | |||||
| s->mb_y = mb_y; | |||||
| ff_init_block_index(s); | |||||
| for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | ||||
| const int mb_xy = mb_x + mb_y * s->mb_stride; | const int mb_xy = mb_x + mb_y * s->mb_stride; | ||||
| ff_update_block_index(s); | |||||
| if (IS_INTRA(s->current_picture.f.mb_type[mb_xy])) | if (IS_INTRA(s->current_picture.f.mb_type[mb_xy])) | ||||
| continue; | continue; | ||||
| if (!(s->error_status_table[mb_xy] & ER_MV_ERROR)) | if (!(s->error_status_table[mb_xy] & ER_MV_ERROR)) | ||||
| @@ -477,6 +482,9 @@ static void guess_mv(MpegEncContext *s) | |||||
| changed = 0; | changed = 0; | ||||
| for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | ||||
| s->mb_x = 0; | |||||
| s->mb_y = mb_y; | |||||
| ff_init_block_index(s); | |||||
| for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | ||||
| const int mb_xy = mb_x + mb_y * s->mb_stride; | const int mb_xy = mb_x + mb_y * s->mb_stride; | ||||
| int mv_predictor[8][2] = { { 0 } }; | int mv_predictor[8][2] = { { 0 } }; | ||||
| @@ -488,6 +496,8 @@ static void guess_mv(MpegEncContext *s) | |||||
| const int mot_index = (mb_x + mb_y * mot_stride) * mot_step; | const int mot_index = (mb_x + mb_y * mot_stride) * mot_step; | ||||
| int prev_x, prev_y, prev_ref; | int prev_x, prev_y, prev_ref; | ||||
| ff_update_block_index(s); | |||||
| if ((mb_x ^ mb_y ^ pass) & 1) | if ((mb_x ^ mb_y ^ pass) & 1) | ||||
| continue; | continue; | ||||
| @@ -1098,11 +1108,16 @@ void ff_er_frame_end(MpegEncContext *s) | |||||
| /* handle inter blocks with damaged AC */ | /* handle inter blocks with damaged AC */ | ||||
| for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | ||||
| s->mb_x = 0; | |||||
| s->mb_y = mb_y; | |||||
| ff_init_block_index(s); | |||||
| for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | ||||
| const int mb_xy = mb_x + mb_y * s->mb_stride; | const int mb_xy = mb_x + mb_y * s->mb_stride; | ||||
| const int mb_type = s->current_picture.f.mb_type[mb_xy]; | const int mb_type = s->current_picture.f.mb_type[mb_xy]; | ||||
| int dir = !s->last_picture.f.data[0]; | int dir = !s->last_picture.f.data[0]; | ||||
| ff_update_block_index(s); | |||||
| error = s->error_status_table[mb_xy]; | error = s->error_status_table[mb_xy]; | ||||
| if (IS_INTRA(mb_type)) | if (IS_INTRA(mb_type)) | ||||
| @@ -1140,11 +1155,16 @@ void ff_er_frame_end(MpegEncContext *s) | |||||
| /* guess MVs */ | /* guess MVs */ | ||||
| if (s->pict_type == AV_PICTURE_TYPE_B) { | if (s->pict_type == AV_PICTURE_TYPE_B) { | ||||
| for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | ||||
| s->mb_x = 0; | |||||
| s->mb_y = mb_y; | |||||
| ff_init_block_index(s); | |||||
| for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | ||||
| int xy = mb_x * 2 + mb_y * 2 * s->b8_stride; | int xy = mb_x * 2 + mb_y * 2 * s->b8_stride; | ||||
| const int mb_xy = mb_x + mb_y * s->mb_stride; | const int mb_xy = mb_x + mb_y * s->mb_stride; | ||||
| const int mb_type = s->current_picture.f.mb_type[mb_xy]; | const int mb_type = s->current_picture.f.mb_type[mb_xy]; | ||||
| ff_update_block_index(s); | |||||
| error = s->error_status_table[mb_xy]; | error = s->error_status_table[mb_xy]; | ||||
| if (IS_INTRA(mb_type)) | if (IS_INTRA(mb_type)) | ||||
| @@ -49,7 +49,6 @@ static const uint8_t scan8[16*3]={ | |||||
| void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride) | void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride) | ||||
| { | { | ||||
| int i; | int i; | ||||
| INIT_CLIP | |||||
| pixel *dst = (pixel*)_dst; | pixel *dst = (pixel*)_dst; | ||||
| dctcoef *block = (dctcoef*)_block; | dctcoef *block = (dctcoef*)_block; | ||||
| stride >>= sizeof(pixel)-1; | stride >>= sizeof(pixel)-1; | ||||
| @@ -74,16 +73,15 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride) | |||||
| const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; | const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; | ||||
| const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); | const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); | ||||
| dst[i + 0*stride]= CLIP(dst[i + 0*stride] + ((z0 + z3) >> 6)); | |||||
| dst[i + 1*stride]= CLIP(dst[i + 1*stride] + ((z1 + z2) >> 6)); | |||||
| dst[i + 2*stride]= CLIP(dst[i + 2*stride] + ((z1 - z2) >> 6)); | |||||
| dst[i + 3*stride]= CLIP(dst[i + 3*stride] + ((z0 - z3) >> 6)); | |||||
| dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6)); | |||||
| dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6)); | |||||
| dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6)); | |||||
| dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6)); | |||||
| } | } | ||||
| } | } | ||||
| void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ | ||||
| int i; | int i; | ||||
| INIT_CLIP | |||||
| pixel *dst = (pixel*)_dst; | pixel *dst = (pixel*)_dst; | ||||
| dctcoef *block = (dctcoef*)_block; | dctcoef *block = (dctcoef*)_block; | ||||
| stride >>= sizeof(pixel)-1; | stride >>= sizeof(pixel)-1; | ||||
| @@ -143,14 +141,14 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ | |||||
| const int b5 = (a3>>2) - a5; | const int b5 = (a3>>2) - a5; | ||||
| const int b7 = a7 - (a1>>2); | const int b7 = a7 - (a1>>2); | ||||
| dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) ); | |||||
| dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) ); | |||||
| dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) ); | |||||
| dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) ); | |||||
| dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) ); | |||||
| dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) ); | |||||
| dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) ); | |||||
| dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) ); | |||||
| dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) ); | |||||
| dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) ); | |||||
| dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) ); | |||||
| dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) ); | |||||
| dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) ); | |||||
| dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) ); | |||||
| dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) ); | |||||
| dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) ); | |||||
| } | } | ||||
| } | } | ||||
| @@ -158,13 +156,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ | |||||
| void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ | void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ | ||||
| int i, j; | int i, j; | ||||
| int dc = (((dctcoef*)block)[0] + 32) >> 6; | int dc = (((dctcoef*)block)[0] + 32) >> 6; | ||||
| INIT_CLIP | |||||
| pixel *dst = (pixel*)p_dst; | pixel *dst = (pixel*)p_dst; | ||||
| stride >>= sizeof(pixel)-1; | stride >>= sizeof(pixel)-1; | ||||
| for( j = 0; j < 4; j++ ) | for( j = 0; j < 4; j++ ) | ||||
| { | { | ||||
| for( i = 0; i < 4; i++ ) | for( i = 0; i < 4; i++ ) | ||||
| dst[i] = CLIP( dst[i] + dc ); | |||||
| dst[i] = av_clip_pixel( dst[i] + dc ); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| } | } | ||||
| @@ -172,13 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ | |||||
| void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ | ||||
| int i, j; | int i, j; | ||||
| int dc = (((dctcoef*)block)[0] + 32) >> 6; | int dc = (((dctcoef*)block)[0] + 32) >> 6; | ||||
| INIT_CLIP | |||||
| pixel *dst = (pixel*)p_dst; | pixel *dst = (pixel*)p_dst; | ||||
| stride >>= sizeof(pixel)-1; | stride >>= sizeof(pixel)-1; | ||||
| for( j = 0; j < 8; j++ ) | for( j = 0; j < 8; j++ ) | ||||
| { | { | ||||
| for( i = 0; i < 8; i++ ) | for( i = 0; i < 8; i++ ) | ||||
| dst[i] = CLIP( dst[i] + dc ); | |||||
| dst[i] = av_clip_pixel( dst[i] + dc ); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| } | } | ||||
| @@ -454,6 +454,8 @@ static av_cold int X264_init(AVCodecContext *avctx) | |||||
| x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR; | x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR; | ||||
| x4->params.i_threads = avctx->thread_count; | x4->params.i_threads = avctx->thread_count; | ||||
| if (avctx->thread_type) | |||||
| x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE; | |||||
| x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT; | x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT; | ||||
| @@ -631,6 +633,7 @@ static const AVCodecDefault x264_defaults[] = { | |||||
| { "coder", "-1" }, | { "coder", "-1" }, | ||||
| { "cmp", "-1" }, | { "cmp", "-1" }, | ||||
| { "threads", AV_STRINGIFY(X264_THREADS_AUTO) }, | { "threads", AV_STRINGIFY(X264_THREADS_AUTO) }, | ||||
| { "thread_type", "0" }, | |||||
| { NULL }, | { NULL }, | ||||
| }; | }; | ||||
| @@ -102,15 +102,13 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){ | |||||
| static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc) | static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc) | ||||
| { | { | ||||
| const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| int i, j; | int i, j; | ||||
| cm += (13*13*dc + 0x200) >> 10; | |||||
| dc = (13*13*dc + 0x200) >> 10; | |||||
| for (i = 0; i < 4; i++) | for (i = 0; i < 4; i++) | ||||
| { | { | ||||
| for (j = 0; j < 4; j++) | for (j = 0; j < 4; j++) | ||||
| dst[j] = cm[ dst[j] ]; | |||||
| dst[j] = av_clip_uint8( dst[j] + dc ); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| @@ -132,7 +132,6 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) | static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) | ||||
| { | { | ||||
| int c0, c1, c2, c3, a0, a1, a2, a3; | int c0, c1, c2, c3, a0, a1, a2, a3; | ||||
| const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| a0 = col[8*0]; | a0 = col[8*0]; | ||||
| a1 = col[8*1]; | a1 = col[8*1]; | ||||
| @@ -142,13 +141,13 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col | |||||
| c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); | ||||
| c1 = a1 * C1 + a3 * C2; | c1 = a1 * C1 + a3 * C2; | ||||
| c3 = a1 * C2 - a3 * C1; | c3 = a1 * C2 - a3 * C1; | ||||
| dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; | |||||
| dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; | |||||
| dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; | |||||
| dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; | |||||
| dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); | |||||
| } | } | ||||
| #define RN_SHIFT 15 | #define RN_SHIFT 15 | ||||
| @@ -160,7 +159,6 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col | |||||
| static inline void idct4row(DCTELEM *row) | static inline void idct4row(DCTELEM *row) | ||||
| { | { | ||||
| int c0, c1, c2, c3, a0, a1, a2, a3; | int c0, c1, c2, c3, a0, a1, a2, a3; | ||||
| //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| a0 = row[0]; | a0 = row[0]; | ||||
| a1 = row[1]; | a1 = row[1]; | ||||
| @@ -224,50 +224,48 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, | |||||
| DCTELEM *col) | DCTELEM *col) | ||||
| { | { | ||||
| int a0, a1, a2, a3, b0, b1, b2, b3; | int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
| INIT_CLIP; | |||||
| IDCT_COLS; | IDCT_COLS; | ||||
| dest[0] = CLIP((a0 + b0) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a1 + b1) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a2 + b2) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a3 + b3) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a3 - b3) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a2 - b2) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a1 - b1) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP((a0 - b0) >> COL_SHIFT); | |||||
| dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); | |||||
| } | } | ||||
| static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | ||||
| DCTELEM *col) | DCTELEM *col) | ||||
| { | { | ||||
| int a0, a1, a2, a3, b0, b1, b2, b3; | int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
| INIT_CLIP; | |||||
| IDCT_COLS; | IDCT_COLS; | ||||
| dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT)); | |||||
| dest += line_size; | dest += line_size; | ||||
| dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |||||
| dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); | |||||
| } | } | ||||
| static inline void FUNC(idctSparseCol)(DCTELEM *col) | static inline void FUNC(idctSparseCol)(DCTELEM *col) | ||||
| @@ -139,8 +139,6 @@ static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right) | |||||
| * @see 8.6 | * @see 8.6 | ||||
| */ | */ | ||||
| static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ | static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; | int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; | ||||
| int a0_sign = a0 >> 31; /* Store sign */ | int a0_sign = a0 >> 31; /* Store sign */ | ||||
| a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ | a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ | ||||
| @@ -163,8 +161,8 @@ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ | |||||
| else{ | else{ | ||||
| d = FFMIN(d, clip); | d = FFMIN(d, clip); | ||||
| d = (d ^ d_sign) - d_sign; /* Restore sign */ | d = (d ^ d_sign) - d_sign; /* Restore sign */ | ||||
| src[-1*stride] = cm[src[-1*stride] - d]; | |||||
| src[ 0*stride] = cm[src[ 0*stride] + d]; | |||||
| src[-1*stride] = av_clip_uint8(src[-1*stride] - d); | |||||
| src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d); | |||||
| } | } | ||||
| return 1; | return 1; | ||||
| } | } | ||||
| @@ -234,19 +232,17 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int dc = block[0]; | int dc = block[0]; | ||||
| const uint8_t *cm; | |||||
| dc = (3 * dc + 1) >> 1; | dc = (3 * dc + 1) >> 1; | ||||
| dc = (3 * dc + 16) >> 5; | dc = (3 * dc + 16) >> 5; | ||||
| cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
| dest[0] = cm[dest[0]]; | |||||
| dest[1] = cm[dest[1]]; | |||||
| dest[2] = cm[dest[2]]; | |||||
| dest[3] = cm[dest[3]]; | |||||
| dest[4] = cm[dest[4]]; | |||||
| dest[5] = cm[dest[5]]; | |||||
| dest[6] = cm[dest[6]]; | |||||
| dest[7] = cm[dest[7]]; | |||||
| dest[0] = av_clip_uint8(dest[0] + dc); | |||||
| dest[1] = av_clip_uint8(dest[1] + dc); | |||||
| dest[2] = av_clip_uint8(dest[2] + dc); | |||||
| dest[3] = av_clip_uint8(dest[3] + dc); | |||||
| dest[4] = av_clip_uint8(dest[4] + dc); | |||||
| dest[5] = av_clip_uint8(dest[5] + dc); | |||||
| dest[6] = av_clip_uint8(dest[6] + dc); | |||||
| dest[7] = av_clip_uint8(dest[7] + dc); | |||||
| dest += linesize; | dest += linesize; | ||||
| } | } | ||||
| } | } | ||||
| @@ -326,19 +322,17 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int dc = block[0]; | int dc = block[0]; | ||||
| const uint8_t *cm; | |||||
| dc = ( 3 * dc + 1) >> 1; | dc = ( 3 * dc + 1) >> 1; | ||||
| dc = (17 * dc + 64) >> 7; | dc = (17 * dc + 64) >> 7; | ||||
| cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
| dest[0] = cm[dest[0]]; | |||||
| dest[1] = cm[dest[1]]; | |||||
| dest[2] = cm[dest[2]]; | |||||
| dest[3] = cm[dest[3]]; | |||||
| dest[4] = cm[dest[4]]; | |||||
| dest[5] = cm[dest[5]]; | |||||
| dest[6] = cm[dest[6]]; | |||||
| dest[7] = cm[dest[7]]; | |||||
| dest[0] = av_clip_uint8(dest[0] + dc); | |||||
| dest[1] = av_clip_uint8(dest[1] + dc); | |||||
| dest[2] = av_clip_uint8(dest[2] + dc); | |||||
| dest[3] = av_clip_uint8(dest[3] + dc); | |||||
| dest[4] = av_clip_uint8(dest[4] + dc); | |||||
| dest[5] = av_clip_uint8(dest[5] + dc); | |||||
| dest[6] = av_clip_uint8(dest[6] + dc); | |||||
| dest[7] = av_clip_uint8(dest[7] + dc); | |||||
| dest += linesize; | dest += linesize; | ||||
| } | } | ||||
| } | } | ||||
| @@ -403,15 +397,13 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int dc = block[0]; | int dc = block[0]; | ||||
| const uint8_t *cm; | |||||
| dc = (17 * dc + 4) >> 3; | dc = (17 * dc + 4) >> 3; | ||||
| dc = (12 * dc + 64) >> 7; | dc = (12 * dc + 64) >> 7; | ||||
| cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
| dest[0] = cm[dest[0]]; | |||||
| dest[1] = cm[dest[1]]; | |||||
| dest[2] = cm[dest[2]]; | |||||
| dest[3] = cm[dest[3]]; | |||||
| dest[0] = av_clip_uint8(dest[0] + dc); | |||||
| dest[1] = av_clip_uint8(dest[1] + dc); | |||||
| dest[2] = av_clip_uint8(dest[2] + dc); | |||||
| dest[3] = av_clip_uint8(dest[3] + dc); | |||||
| dest += linesize; | dest += linesize; | ||||
| } | } | ||||
| } | } | ||||
| @@ -476,15 +468,13 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int dc = block[0]; | int dc = block[0]; | ||||
| const uint8_t *cm; | |||||
| dc = (17 * dc + 4) >> 3; | dc = (17 * dc + 4) >> 3; | ||||
| dc = (17 * dc + 64) >> 7; | dc = (17 * dc + 64) >> 7; | ||||
| cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
| dest[0] = cm[dest[0]]; | |||||
| dest[1] = cm[dest[1]]; | |||||
| dest[2] = cm[dest[2]]; | |||||
| dest[3] = cm[dest[3]]; | |||||
| dest[0] = av_clip_uint8(dest[0] + dc); | |||||
| dest[1] = av_clip_uint8(dest[1] + dc); | |||||
| dest[2] = av_clip_uint8(dest[2] + dc); | |||||
| dest[3] = av_clip_uint8(dest[3] + dc); | |||||
| dest += linesize; | dest += linesize; | ||||
| } | } | ||||
| } | } | ||||
| @@ -41,7 +41,6 @@ | |||||
| static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) | static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) | ||||
| { | { | ||||
| int16_t *ip = input; | int16_t *ip = input; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; | ||||
| int Ed, Gd, Add, Bdd, Fd, Hd; | int Ed, Gd, Add, Bdd, Fd, Hd; | ||||
| @@ -147,29 +146,29 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int | |||||
| ip[5*8] = (Fd + Bdd ) >> 4; | ip[5*8] = (Fd + Bdd ) >> 4; | ||||
| ip[6*8] = (Fd - Bdd ) >> 4; | ip[6*8] = (Fd - Bdd ) >> 4; | ||||
| }else if(type==1){ | }else if(type==1){ | ||||
| dst[0*stride] = cm[(Gd + Cd ) >> 4]; | |||||
| dst[7*stride] = cm[(Gd - Cd ) >> 4]; | |||||
| dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4); | |||||
| dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4); | |||||
| dst[1*stride] = cm[(Add + Hd ) >> 4]; | |||||
| dst[2*stride] = cm[(Add - Hd ) >> 4]; | |||||
| dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4); | |||||
| dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4); | |||||
| dst[3*stride] = cm[(Ed + Dd ) >> 4]; | |||||
| dst[4*stride] = cm[(Ed - Dd ) >> 4]; | |||||
| dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4); | |||||
| dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4); | |||||
| dst[5*stride] = cm[(Fd + Bdd ) >> 4]; | |||||
| dst[6*stride] = cm[(Fd - Bdd ) >> 4]; | |||||
| dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4); | |||||
| dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4); | |||||
| }else{ | }else{ | ||||
| dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd ) >> 4)]; | |||||
| dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd ) >> 4)]; | |||||
| dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4)); | |||||
| dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4)); | |||||
| dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)]; | |||||
| dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)]; | |||||
| dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4)); | |||||
| dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4)); | |||||
| dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd ) >> 4)]; | |||||
| dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd ) >> 4)]; | |||||
| dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4)); | |||||
| dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4)); | |||||
| dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)]; | |||||
| dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)]; | |||||
| dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4)); | |||||
| dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4)); | |||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -190,18 +189,18 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int | |||||
| dst[4*stride]= | dst[4*stride]= | ||||
| dst[5*stride]= | dst[5*stride]= | ||||
| dst[6*stride]= | dst[6*stride]= | ||||
| dst[7*stride]= cm[128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)]; | |||||
| dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)); | |||||
| }else{ | }else{ | ||||
| if(ip[0*8]){ | if(ip[0*8]){ | ||||
| int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); | int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); | ||||
| dst[0*stride] = cm[dst[0*stride] + v]; | |||||
| dst[1*stride] = cm[dst[1*stride] + v]; | |||||
| dst[2*stride] = cm[dst[2*stride] + v]; | |||||
| dst[3*stride] = cm[dst[3*stride] + v]; | |||||
| dst[4*stride] = cm[dst[4*stride] + v]; | |||||
| dst[5*stride] = cm[dst[5*stride] + v]; | |||||
| dst[6*stride] = cm[dst[6*stride] + v]; | |||||
| dst[7*stride] = cm[dst[7*stride] + v]; | |||||
| dst[0*stride] = av_clip_uint8(dst[0*stride] + v); | |||||
| dst[1*stride] = av_clip_uint8(dst[1*stride] + v); | |||||
| dst[2*stride] = av_clip_uint8(dst[2*stride] + v); | |||||
| dst[3*stride] = av_clip_uint8(dst[3*stride] + v); | |||||
| dst[4*stride] = av_clip_uint8(dst[4*stride] + v); | |||||
| dst[5*stride] = av_clip_uint8(dst[5*stride] + v); | |||||
| dst[6*stride] = av_clip_uint8(dst[6*stride] + v); | |||||
| dst[7*stride] = av_clip_uint8(dst[7*stride] + v); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -225,17 +224,16 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/* | |||||
| void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ | void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ | ||||
| int i, dc = (block[0] + 15) >> 5; | int i, dc = (block[0] + 15) >> 5; | ||||
| const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
| dest[0] = cm[dest[0]]; | |||||
| dest[1] = cm[dest[1]]; | |||||
| dest[2] = cm[dest[2]]; | |||||
| dest[3] = cm[dest[3]]; | |||||
| dest[4] = cm[dest[4]]; | |||||
| dest[5] = cm[dest[5]]; | |||||
| dest[6] = cm[dest[6]]; | |||||
| dest[7] = cm[dest[7]]; | |||||
| dest[0] = av_clip_uint8(dest[0] + dc); | |||||
| dest[1] = av_clip_uint8(dest[1] + dc); | |||||
| dest[2] = av_clip_uint8(dest[2] + dc); | |||||
| dest[3] = av_clip_uint8(dest[3] + dc); | |||||
| dest[4] = av_clip_uint8(dest[4] + dc); | |||||
| dest[5] = av_clip_uint8(dest[5] + dc); | |||||
| dest[6] = av_clip_uint8(dest[6] + dc); | |||||
| dest[7] = av_clip_uint8(dest[7] + dc); | |||||
| dest += line_size; | dest += line_size; | ||||
| } | } | ||||
| } | } | ||||
| @@ -80,7 +80,6 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |||||
| static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | ||||
| { | { | ||||
| int i, t0, t1, t2, t3; | int i, t0, t1, t2, t3; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
| DCTELEM tmp[16]; | DCTELEM tmp[16]; | ||||
| for (i = 0; i < 4; i++) { | for (i = 0; i < 4; i++) { | ||||
| @@ -105,10 +104,10 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | |||||
| t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | ||||
| t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | ||||
| dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; | |||||
| dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; | |||||
| dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; | |||||
| dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; | |||||
| dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); | |||||
| dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); | |||||
| dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); | |||||
| dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| } | } | ||||
| @@ -116,14 +115,13 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | |||||
| static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | ||||
| { | { | ||||
| int i, dc = (block[0] + 4) >> 3; | int i, dc = (block[0] + 4) >> 3; | ||||
| uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; | |||||
| block[0] = 0; | block[0] = 0; | ||||
| for (i = 0; i < 4; i++) { | for (i = 0; i < 4; i++) { | ||||
| dst[0] = cm[dst[0]]; | |||||
| dst[1] = cm[dst[1]]; | |||||
| dst[2] = cm[dst[2]]; | |||||
| dst[3] = cm[dst[3]]; | |||||
| dst[0] = av_clip_uint8(dst[0] + dc); | |||||
| dst[1] = av_clip_uint8(dst[1] + dc); | |||||
| dst[2] = av_clip_uint8(dst[2] + dc); | |||||
| dst[3] = av_clip_uint8(dst[3] + dc); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| } | } | ||||
| @@ -104,7 +104,7 @@ cglobal sbr_hf_g_filt, 5, 6, 5 | |||||
| movq m2, [r1] | movq m2, [r1] | ||||
| punpckldq m0, m0 | punpckldq m0, m0 | ||||
| mulps m2, m0 | mulps m2, m0 | ||||
| movq [r0], m2 | |||||
| movlps [r0], m2 | |||||
| add r0, 8 | add r0, 8 | ||||
| add r2, 4 | add r2, 4 | ||||
| add r1, STEP | add r1, STEP | ||||
| @@ -153,7 +153,7 @@ | |||||
| */ | */ | ||||
| #define LIBAVUTIL_VERSION_MAJOR 51 | #define LIBAVUTIL_VERSION_MAJOR 51 | ||||
| #define LIBAVUTIL_VERSION_MINOR 41 | |||||
| #define LIBAVUTIL_VERSION_MINOR 42 | |||||
| #define LIBAVUTIL_VERSION_MICRO 100 | #define LIBAVUTIL_VERSION_MICRO 100 | ||||
| #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||
| @@ -23,7 +23,7 @@ static int flags, checked; | |||||
| void av_force_cpu_flags(int arg){ | void av_force_cpu_flags(int arg){ | ||||
| flags = arg; | flags = arg; | ||||
| checked = 1; | |||||
| checked = arg != -1; | |||||
| } | } | ||||
| int av_get_cpu_flags(void) | int av_get_cpu_flags(void) | ||||
| @@ -39,6 +39,13 @@ int av_get_cpu_flags(void) | |||||
| return flags; | return flags; | ||||
| } | } | ||||
| void av_set_cpu_flags_mask(int mask) | |||||
| { | |||||
| checked = 0; | |||||
| flags = av_get_cpu_flags() & mask; | |||||
| checked = 1; | |||||
| } | |||||
| #ifdef TEST | #ifdef TEST | ||||
| #undef printf | #undef printf | ||||
| @@ -21,6 +21,8 @@ | |||||
| #ifndef AVUTIL_CPU_H | #ifndef AVUTIL_CPU_H | ||||
| #define AVUTIL_CPU_H | #define AVUTIL_CPU_H | ||||
| #include "attributes.h" | |||||
| #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ | #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ | ||||
| /* lower 16 bits - CPU features */ | /* lower 16 bits - CPU features */ | ||||
| @@ -49,12 +51,19 @@ | |||||
| */ | */ | ||||
| int av_get_cpu_flags(void); | int av_get_cpu_flags(void); | ||||
| /** | /** | ||||
| * Disables cpu detection and forces the specified flags. | * Disables cpu detection and forces the specified flags. | ||||
| */ | */ | ||||
| void av_force_cpu_flags(int flags); | void av_force_cpu_flags(int flags); | ||||
| /** | |||||
| * Set a mask on flags returned by av_get_cpu_flags(). | |||||
| * This function is mainly useful for testing. | |||||
| * Please use av_force_cpu_flags() and av_get_cpu_flags() instead which are more flexible | |||||
| * | |||||
| * @warning this function is not thread safe. | |||||
| */ | |||||
| attribute_deprecated void av_set_cpu_flags_mask(int mask); | |||||
| /* The following CPU-specific functions shall not be called directly. */ | /* The following CPU-specific functions shall not be called directly. */ | ||||
| int ff_get_cpu_flags_arm(void); | int ff_get_cpu_flags_arm(void); | ||||
| @@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize, | |||||
| static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, | static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, | ||||
| const uint8_t *src, const int16_t *filter, | const uint8_t *src, const int16_t *filter, | ||||
| const int16_t *filterPos, int filterSize) | |||||
| const int32_t *filterPos, int filterSize) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| DECLARE_ALIGNED(16, int, tempo)[4]; | DECLARE_ALIGNED(16, int, tempo)[4]; | ||||
| @@ -63,7 +63,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride, | |||||
| static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, | static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, | ||||
| const int16_t *filter, | const int16_t *filter, | ||||
| const int16_t *filterPos, int filterSize) | |||||
| const int32_t *filterPos, int filterSize) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int32_t *dst = (int32_t *) _dst; | int32_t *dst = (int32_t *) _dst; | ||||
| @@ -89,7 +89,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t | |||||
| static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, | static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, | ||||
| const int16_t *filter, | const int16_t *filter, | ||||
| const int16_t *filterPos, int filterSize) | |||||
| const int32_t *filterPos, int filterSize) | |||||
| { | { | ||||
| int i; | int i; | ||||
| const uint16_t *src = (const uint16_t *) _src; | const uint16_t *src = (const uint16_t *) _src; | ||||
| @@ -113,7 +113,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t | |||||
| // bilinear / bicubic scaling | // bilinear / bicubic scaling | ||||
| static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, | static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, | ||||
| const int16_t *filter, const int16_t *filterPos, | |||||
| const int16_t *filter, const int32_t *filterPos, | |||||
| int filterSize) | int filterSize) | ||||
| { | { | ||||
| int i; | int i; | ||||
| @@ -131,7 +131,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t * | |||||
| } | } | ||||
| static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, | static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, | ||||
| const int16_t *filter, const int16_t *filterPos, | |||||
| const int16_t *filter, const int32_t *filterPos, | |||||
| int filterSize) | int filterSize) | ||||
| { | { | ||||
| int i; | int i; | ||||
| @@ -234,7 +234,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, | |||||
| static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, | static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, | ||||
| const uint8_t *src_in[4], int srcW, int xInc, | const uint8_t *src_in[4], int srcW, int xInc, | ||||
| const int16_t *hLumFilter, | const int16_t *hLumFilter, | ||||
| const int16_t *hLumFilterPos, int hLumFilterSize, | |||||
| const int32_t *hLumFilterPos, int hLumFilterSize, | |||||
| uint8_t *formatConvBuffer, | uint8_t *formatConvBuffer, | ||||
| uint32_t *pal, int isAlpha) | uint32_t *pal, int isAlpha) | ||||
| { | { | ||||
| @@ -282,7 +282,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||||
| static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, | static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, | ||||
| const uint8_t *src_in[4], | const uint8_t *src_in[4], | ||||
| int srcW, int xInc, const int16_t *hChrFilter, | int srcW, int xInc, const int16_t *hChrFilter, | ||||
| const int16_t *hChrFilterPos, int hChrFilterSize, | |||||
| const int32_t *hChrFilterPos, int hChrFilterSize, | |||||
| uint8_t *formatConvBuffer, uint32_t *pal) | uint8_t *formatConvBuffer, uint32_t *pal) | ||||
| { | { | ||||
| const uint8_t *src1 = src_in[1], *src2 = src_in[2]; | const uint8_t *src1 = src_in[1], *src2 = src_in[2]; | ||||
| @@ -326,10 +326,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], | |||||
| const int chrXInc= c->chrXInc; | const int chrXInc= c->chrXInc; | ||||
| const enum PixelFormat dstFormat= c->dstFormat; | const enum PixelFormat dstFormat= c->dstFormat; | ||||
| const int flags= c->flags; | const int flags= c->flags; | ||||
| int16_t *vLumFilterPos= c->vLumFilterPos; | |||||
| int16_t *vChrFilterPos= c->vChrFilterPos; | |||||
| int16_t *hLumFilterPos= c->hLumFilterPos; | |||||
| int16_t *hChrFilterPos= c->hChrFilterPos; | |||||
| int32_t *vLumFilterPos= c->vLumFilterPos; | |||||
| int32_t *vChrFilterPos= c->vChrFilterPos; | |||||
| int32_t *hLumFilterPos= c->hLumFilterPos; | |||||
| int32_t *hChrFilterPos= c->hChrFilterPos; | |||||
| int16_t *hLumFilter= c->hLumFilter; | int16_t *hLumFilter= c->hLumFilter; | ||||
| int16_t *hChrFilter= c->hChrFilter; | int16_t *hChrFilter= c->hChrFilter; | ||||
| int32_t *lumMmxFilter= c->lumMmxFilter; | int32_t *lumMmxFilter= c->lumMmxFilter; | ||||
| @@ -299,10 +299,10 @@ typedef struct SwsContext { | |||||
| int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. | int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. | ||||
| int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. | int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. | ||||
| int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. | int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. | ||||
| int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. | |||||
| int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. | |||||
| int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. | |||||
| int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. | |||||
| int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. | |||||
| int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. | |||||
| int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. | |||||
| int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. | |||||
| int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. | int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. | ||||
| int hChrFilterSize; ///< Horizontal filter size for chroma pixels. | int hChrFilterSize; ///< Horizontal filter size for chroma pixels. | ||||
| int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. | int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. | ||||
| @@ -515,10 +515,10 @@ typedef struct SwsContext { | |||||
| /** @{ */ | /** @{ */ | ||||
| void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, | void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, | ||||
| const uint8_t *src, const int16_t *filter, | const uint8_t *src, const int16_t *filter, | ||||
| const int16_t *filterPos, int filterSize); | |||||
| const int32_t *filterPos, int filterSize); | |||||
| void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, | void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, | ||||
| const uint8_t *src, const int16_t *filter, | const uint8_t *src, const int16_t *filter, | ||||
| const int16_t *filterPos, int filterSize); | |||||
| const int32_t *filterPos, int filterSize); | |||||
| /** @} */ | /** @} */ | ||||
| /// Color range conversion function for luma plane if needed. | /// Color range conversion function for luma plane if needed. | ||||
| @@ -191,7 +191,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist | |||||
| dist-1.0); | dist-1.0); | ||||
| } | } | ||||
| static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, | |||||
| static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, | |||||
| int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, | int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, | ||||
| SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) | SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) | ||||
| { | { | ||||
| @@ -207,7 +207,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi | |||||
| emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) | emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) | ||||
| // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end | // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end | ||||
| FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail); | |||||
| FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail); | |||||
| if (FFABS(xInc - 0x10000) <10) { // unscaled | if (FFABS(xInc - 0x10000) <10) { // unscaled | ||||
| int i; | int i; | ||||
| @@ -38,7 +38,7 @@ SECTION .text | |||||
| ; (SwsContext *c, int{16,32}_t *dst, | ; (SwsContext *c, int{16,32}_t *dst, | ||||
| ; int dstW, const uint{8,16}_t *src, | ; int dstW, const uint{8,16}_t *src, | ||||
| ; const int16_t *filter, | ; const int16_t *filter, | ||||
| ; const int16_t *filterPos, int filterSize); | |||||
| ; const int32_t *filterPos, int filterSize); | |||||
| ; | ; | ||||
| ; Scale one horizontal line. Input is either 8-bits width or 16-bits width | ; Scale one horizontal line. Input is either 8-bits width or 16-bits width | ||||
| ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to | ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to | ||||
| @@ -53,6 +53,9 @@ SECTION .text | |||||
| cglobal hscale%1to%2_%4_%5, %6, 7, %7 | cglobal hscale%1to%2_%4_%5, %6, 7, %7 | ||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| movsxd r2, r2d | movsxd r2, r2d | ||||
| %define mov32 movsxd | |||||
| %else ; x86-32 | |||||
| %define mov32 mov | |||||
| %endif ; x86-64 | %endif ; x86-64 | ||||
| %if %2 == 19 | %if %2 == 19 | ||||
| %if mmsize == 8 ; mmx | %if mmsize == 8 ; mmx | ||||
| @@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| %else ; %2 == 19 | %else ; %2 == 19 | ||||
| lea r1, [r1+r2*(4>>r2shr)] | lea r1, [r1+r2*(4>>r2shr)] | ||||
| %endif ; %2 == 15/19 | %endif ; %2 == 15/19 | ||||
| lea r5, [r5+r2*(2>>r2shr)] | |||||
| lea r5, [r5+r2*(4>>r2shr)] | |||||
| neg r2 | neg r2 | ||||
| .loop: | .loop: | ||||
| %if %3 == 4 ; filterSize == 4 scaling | %if %3 == 4 ; filterSize == 4 scaling | ||||
| ; load 2x4 or 4x4 source pixels into m0/m1 | ; load 2x4 or 4x4 source pixels into m0/m1 | ||||
| movsx r0, word [r5+r2*2+0] ; filterPos[0] | |||||
| movsx r6, word [r5+r2*2+2] ; filterPos[1] | |||||
| mov32 r0, dword [r5+r2*4+0] ; filterPos[0] | |||||
| mov32 r6, dword [r5+r2*4+4] ; filterPos[1] | |||||
| movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] | movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] | ||||
| %if mmsize == 8 | %if mmsize == 8 | ||||
| movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | ||||
| @@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| %else ; %1 == 8 | %else ; %1 == 8 | ||||
| movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | ||||
| %endif | %endif | ||||
| movsx r0, word [r5+r2*2+4] ; filterPos[2] | |||||
| movsx r6, word [r5+r2*2+6] ; filterPos[3] | |||||
| mov32 r0, dword [r5+r2*4+8] ; filterPos[2] | |||||
| mov32 r6, dword [r5+r2*4+12] ; filterPos[3] | |||||
| movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] | movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] | ||||
| %if %1 > 8 | %if %1 > 8 | ||||
| movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] | movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] | ||||
| @@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| %endif ; mmx/sse2/ssse3/sse4 | %endif ; mmx/sse2/ssse3/sse4 | ||||
| %else ; %3 == 8, i.e. filterSize == 8 scaling | %else ; %3 == 8, i.e. filterSize == 8 scaling | ||||
| ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 | ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 | ||||
| movsx r0, word [r5+r2*1+0] ; filterPos[0] | |||||
| movsx r6, word [r5+r2*1+2] ; filterPos[1] | |||||
| mov32 r0, dword [r5+r2*2+0] ; filterPos[0] | |||||
| mov32 r6, dword [r5+r2*2+4] ; filterPos[1] | |||||
| movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] | movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] | ||||
| %if mmsize == 8 | %if mmsize == 8 | ||||
| movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] | movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] | ||||
| @@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] | movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] | ||||
| %else ; mmsize == 16 | %else ; mmsize == 16 | ||||
| movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] | movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] | ||||
| movsx r0, word [r5+r2*1+4] ; filterPos[2] | |||||
| movsx r6, word [r5+r2*1+6] ; filterPos[3] | |||||
| mov32 r0, dword [r5+r2*2+8] ; filterPos[2] | |||||
| mov32 r6, dword [r5+r2*2+12] ; filterPos[3] | |||||
| movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] | movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] | ||||
| movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] | movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] | ||||
| %endif ; mmsize == 8/16 | %endif ; mmsize == 8/16 | ||||
| @@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| %define r1x r1 | %define r1x r1 | ||||
| %define filter2 r6m | %define filter2 r6m | ||||
| %endif ; x86-32/64 | %endif ; x86-32/64 | ||||
| lea r5, [r5+r2*2] | |||||
| lea r5, [r5+r2*4] | |||||
| %if %2 == 15 | %if %2 == 15 | ||||
| lea r1, [r1+r2*2] | lea r1, [r1+r2*2] | ||||
| %else ; %2 == 19 | %else ; %2 == 19 | ||||
| @@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| neg r2 | neg r2 | ||||
| .loop: | .loop: | ||||
| movsx r0, word [r5+r2*2+0] ; filterPos[0] | |||||
| movsx r1x, word [r5+r2*2+2] ; filterPos[1] | |||||
| mov32 r0, dword [r5+r2*4+0] ; filterPos[0] | |||||
| mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] | |||||
| ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? | ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? | ||||
| pxor m4, m4 | pxor m4, m4 | ||||
| pxor m5, m5 | pxor m5, m5 | ||||
| @@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||||
| jl .innerloop | jl .innerloop | ||||
| %ifidn %4, X4 | %ifidn %4, X4 | ||||
| movsx r1x, word [r5+r2*2+2] ; filterPos[1] | |||||
| mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] | |||||
| movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] | movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] | ||||
| sub r1x, r6 ; and first 4 srcpx of dstpx[1] | sub r1x, r6 ; and first 4 srcpx of dstpx[1] | ||||
| %if %1 > 8 | %if %1 > 8 | ||||
| @@ -94,8 +94,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI | |||||
| int16_t **alpPixBuf= c->alpPixBuf; | int16_t **alpPixBuf= c->alpPixBuf; | ||||
| const int vLumBufSize= c->vLumBufSize; | const int vLumBufSize= c->vLumBufSize; | ||||
| const int vChrBufSize= c->vChrBufSize; | const int vChrBufSize= c->vChrBufSize; | ||||
| int16_t *vLumFilterPos= c->vLumFilterPos; | |||||
| int16_t *vChrFilterPos= c->vChrFilterPos; | |||||
| int32_t *vLumFilterPos= c->vLumFilterPos; | |||||
| int32_t *vChrFilterPos= c->vChrFilterPos; | |||||
| int16_t *vLumFilter= c->vLumFilter; | int16_t *vLumFilter= c->vLumFilter; | ||||
| int16_t *vChrFilter= c->vChrFilter; | int16_t *vChrFilter= c->vChrFilter; | ||||
| int32_t *lumMmxFilter= c->lumMmxFilter; | int32_t *lumMmxFilter= c->lumMmxFilter; | ||||
| @@ -266,7 +266,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( | |||||
| SwsContext *c, int16_t *data, \ | SwsContext *c, int16_t *data, \ | ||||
| int dstW, const uint8_t *src, \ | int dstW, const uint8_t *src, \ | ||||
| const int16_t *filter, \ | const int16_t *filter, \ | ||||
| const int16_t *filterPos, int filterSize) | |||||
| const int32_t *filterPos, int filterSize) | |||||
| #define SCALE_FUNCS(filter_n, opt) \ | #define SCALE_FUNCS(filter_n, opt) \ | ||||
| SCALE_FUNC(filter_n, 8, 15, opt); \ | SCALE_FUNC(filter_n, 8, 15, opt); \ | ||||
| @@ -1450,7 +1450,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||||
| int dstWidth, const uint8_t *src, | int dstWidth, const uint8_t *src, | ||||
| int srcW, int xInc) | int srcW, int xInc) | ||||
| { | { | ||||
| int16_t *filterPos = c->hLumFilterPos; | |||||
| int32_t *filterPos = c->hLumFilterPos; | |||||
| int16_t *filter = c->hLumFilter; | int16_t *filter = c->hLumFilter; | ||||
| void *mmx2FilterCode= c->lumMmx2FilterCode; | void *mmx2FilterCode= c->lumMmx2FilterCode; | ||||
| int i; | int i; | ||||
| @@ -1546,7 +1546,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||||
| int dstWidth, const uint8_t *src1, | int dstWidth, const uint8_t *src1, | ||||
| const uint8_t *src2, int srcW, int xInc) | const uint8_t *src2, int srcW, int xInc) | ||||
| { | { | ||||
| int16_t *filterPos = c->hChrFilterPos; | |||||
| int32_t *filterPos = c->hChrFilterPos; | |||||
| int16_t *filter = c->hChrFilter; | int16_t *filter = c->hChrFilter; | ||||
| void *mmx2FilterCode= c->chrMmx2FilterCode; | void *mmx2FilterCode= c->chrMmx2FilterCode; | ||||
| int i; | int i; | ||||
| @@ -142,7 +142,7 @@ fate:: $(FATE) | |||||
| $(FATE): $(TOOL)$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) | $(FATE): $(TOOL)$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) | ||||
| @echo "TEST $(@:fate-%=%)" | @echo "TEST $(@:fate-%=%)" | ||||
| $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(TOOL)' | |||||
| $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' | |||||
| fate-list: | fate-list: | ||||
| @printf '%s\n' $(sort $(FATE)) | @printf '%s\n' $(sort $(FATE)) | ||||
| @@ -17,7 +17,7 @@ ref=${7:-"${base}/ref/fate/${test}"} | |||||
| fuzz=$8 | fuzz=$8 | ||||
| threads=${9:-1} | threads=${9:-1} | ||||
| thread_type=${10:-frame+slice} | thread_type=${10:-frame+slice} | ||||
| tool=${11} | |||||
| cpuflags=${11:-all} | |||||
| outdir="tests/data/fate" | outdir="tests/data/fate" | ||||
| outfile="${outdir}/${test}" | outfile="${outdir}/${test}" | ||||
| @@ -51,7 +51,7 @@ run(){ | |||||
| } | } | ||||
| avconv(){ | avconv(){ | ||||
| run $tool -nostats -threads $threads -thread_type $thread_type "$@" | |||||
| run ffmpeg -nostats -threads $threads -thread_type $thread_type -cpuflags $cpuflags "$@" | |||||
| } | } | ||||
| framecrc(){ | framecrc(){ | ||||
| @@ -77,7 +77,7 @@ pcm(){ | |||||
| regtest(){ | regtest(){ | ||||
| t="${test#$2-}" | t="${test#$2-}" | ||||
| ref=${base}/ref/$2/$t | ref=${base}/ref/$2/$t | ||||
| ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$tool" "$samples" | |||||
| ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$cpuflags" "$samples" | |||||
| } | } | ||||
| codectest(){ | codectest(){ | ||||
| @@ -10,7 +10,7 @@ raw_src_dir=$3 | |||||
| target_exec=$4 | target_exec=$4 | ||||
| target_path=$5 | target_path=$5 | ||||
| threads=${6:-1} | threads=${6:-1} | ||||
| tool=$8 | |||||
| cpuflags=${8:-all} | |||||
| samples=$9 | samples=$9 | ||||
| datadir="./tests/data" | datadir="./tests/data" | ||||
| @@ -20,7 +20,7 @@ this="$test.$test_ref" | |||||
| outfile="$datadir/$test_ref/" | outfile="$datadir/$test_ref/" | ||||
| # various files | # various files | ||||
| avconv="$target_exec ${target_path}/${tool}" | |||||
| avconv="$target_exec ${target_path}/ffmpeg" | |||||
| tiny_psnr="tests/tiny_psnr" | tiny_psnr="tests/tiny_psnr" | ||||
| raw_src="${target_path}/$raw_src_dir/%02d.pgm" | raw_src="${target_path}/$raw_src_dir/%02d.pgm" | ||||
| raw_dst="$datadir/$this.out.yuv" | raw_dst="$datadir/$this.out.yuv" | ||||
| @@ -45,7 +45,7 @@ echov(){ | |||||
| . $(dirname $0)/md5.sh | . $(dirname $0)/md5.sh | ||||
| AVCONV_OPTS="-nostats -y" | |||||
| AVCONV_OPTS="-nostats -y -cpuflags $cpuflags" | |||||
| COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact" | COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact" | ||||
| DEC_OPTS="$COMMON_OPTS -threads $threads" | DEC_OPTS="$COMMON_OPTS -threads $threads" | ||||
| ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint" | ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint" | ||||