* qatar/master: (32 commits) 10-bit H.264 x86 chroma v loopfilter asm Port SMPTE S302M audio decoder from FFmbc 0.3. [Copyright headers corrected] Fix crash of interlaced MPEG2 decoding h264pred: fix one more aliasing violation. doc/APIchanges: fill in missing hashes and dates. flacenc: use proper initializers for AVOption default values. lavc: deprecate named constants for deprecated antialias_algo. aac: workaround for compilation on cygwin swscale: extend YUV422p support to 10bits depth tiff: add support for inverted FillOrder for uncompressed data Remove unused softfloat implementation. h264pred: fix aliasing violations. rotozoom: Eliminate French variable name. rotozoom: Check return value of fread(). rotozoom: Return an error value instead of calling exit(). rotozoom: Make init_demo() return int and check for errors on invocation. rotozoom: Drop silly UINT8 typedef. rotozoom: Drop some unnecessary parentheses. rotozoom: K&R coding style cosmetics rtsp: Only do keepalive using GET_PARAMETER if the server supports it ... Conflicts: Changelog cmdutils.c doc/APIchanges doc/general.texi ffmpeg.c ffplay.c libavcodec/h264pred_template.c libavcodec/resample.c libavutil/pixfmt.h libavutil/softfloat.c libavutil/softfloat.h tests/rotozoom.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.8
| @@ -12,6 +12,8 @@ version <next>: | |||||
| - Lots of deprecated API cruft removed | - Lots of deprecated API cruft removed | ||||
| - fft and imdct optimizations for AVX (Sandy Bridge) processors | - fft and imdct optimizations for AVX (Sandy Bridge) processors | ||||
| - showinfo filter added | - showinfo filter added | ||||
| - DPX image encoder | |||||
| - SMPTE 302M AES3 audio decoder | |||||
| version 0.7_beta1: | version 0.7_beta1: | ||||
| @@ -13,7 +13,7 @@ libavutil: 2011-04-18 | |||||
| API changes, most recent first: | API changes, most recent first: | ||||
| 2011-05-10 - xxxxxxx - lavc 53.3.0 - avcodec.h | |||||
| 2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h | |||||
| Deprecate AVLPCType and the following fields in | Deprecate AVLPCType and the following fields in | ||||
| AVCodecContext: lpc_coeff_precision, prediction_order_method, | AVCodecContext: lpc_coeff_precision, prediction_order_method, | ||||
| min_partition_order, max_partition_order, lpc_type, lpc_passes. | min_partition_order, max_partition_order, lpc_type, lpc_passes. | ||||
| @@ -43,15 +43,15 @@ API changes, most recent first: | |||||
| Add av_dynarray_add function for adding | Add av_dynarray_add function for adding | ||||
| an element to a dynamic array. | an element to a dynamic array. | ||||
| 2011-04-XX - bebe72f - lavu 51.1.0 - avutil.h | |||||
| 2011-04-26 - bebe72f - lavu 51.1.0 - avutil.h | |||||
| Add AVPictureType enum and av_get_picture_type_char(), deprecate | Add AVPictureType enum and av_get_picture_type_char(), deprecate | ||||
| FF_*_TYPE defines and av_get_pict_type_char() defined in | FF_*_TYPE defines and av_get_pict_type_char() defined in | ||||
| libavcodec/avcodec.h. | libavcodec/avcodec.h. | ||||
| 2011-04-xx - 10d3940 - lavfi 2.3.0 - avfilter.h | |||||
| 2011-04-26 - 10d3940 - lavfi 2.3.0 - avfilter.h | |||||
| Add pict_type and key_frame fields to AVFilterBufferRefVideo. | Add pict_type and key_frame fields to AVFilterBufferRefVideo. | ||||
| 2011-04-xx - 7a11c82 - lavfi 2.2.0 - vsrc_buffer | |||||
| 2011-04-26 - 7a11c82 - lavfi 2.2.0 - vsrc_buffer | |||||
| Add sample_aspect_ratio fields to vsrc_buffer arguments | Add sample_aspect_ratio fields to vsrc_buffer arguments | ||||
| 2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h | 2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h | ||||
| @@ -19,7 +19,6 @@ integer.c 128bit integer math | |||||
| lls.c | lls.c | ||||
| mathematics.c greatest common divisor, integer sqrt, integer log2, ... | mathematics.c greatest common divisor, integer sqrt, integer log2, ... | ||||
| mem.c memory allocation routines with guaranteed alignment | mem.c memory allocation routines with guaranteed alignment | ||||
| softfloat.c | |||||
| Headers: | Headers: | ||||
| bswap.h big/little/native-endian conversion code | bswap.h big/little/native-endian conversion code | ||||
| @@ -677,6 +677,7 @@ following image formats are supported: | |||||
| @item Sierra VMD audio @tab @tab X | @item Sierra VMD audio @tab @tab X | ||||
| @tab Used in Sierra VMD files. | @tab Used in Sierra VMD files. | ||||
| @item Smacker audio @tab @tab X | @item Smacker audio @tab @tab X | ||||
| @item SMPTE 302M AES3 audio @tab @tab X | |||||
| @item Sonic @tab X @tab X | @item Sonic @tab X @tab X | ||||
| @tab experimental codec | @tab experimental codec | ||||
| @item Sonic lossless @tab X @tab X | @item Sonic lossless @tab X @tab X | ||||
| @@ -663,11 +663,11 @@ static void choose_pixel_fmt(AVStream *st, AVCodec *codec) | |||||
| } | } | ||||
| if (*p == -1) { | if (*p == -1) { | ||||
| if(st->codec->pix_fmt != PIX_FMT_NONE) | if(st->codec->pix_fmt != PIX_FMT_NONE) | ||||
| av_log(NULL, AV_LOG_WARNING, | |||||
| "Incompatible pixel format '%s' for codec '%s', auto-selecting format '%s'\n", | |||||
| av_pix_fmt_descriptors[st->codec->pix_fmt].name, | |||||
| codec->name, | |||||
| av_pix_fmt_descriptors[codec->pix_fmts[0]].name); | |||||
| av_log(NULL, AV_LOG_WARNING, | |||||
| "Incompatible pixel format '%s' for codec '%s', auto-selecting format '%s'\n", | |||||
| av_pix_fmt_descriptors[st->codec->pix_fmt].name, | |||||
| codec->name, | |||||
| av_pix_fmt_descriptors[codec->pix_fmts[0]].name); | |||||
| st->codec->pix_fmt = codec->pix_fmts[0]; | st->codec->pix_fmt = codec->pix_fmts[0]; | ||||
| } | } | ||||
| } | } | ||||
| @@ -329,6 +329,7 @@ OBJS-$(CONFIG_RV30_DECODER) += rv30.o rv34.o rv30dsp.o \ | |||||
| mpegvideo.o error_resilience.o | mpegvideo.o error_resilience.o | ||||
| OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o rv40dsp.o \ | OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o rv40dsp.o \ | ||||
| mpegvideo.o error_resilience.o | mpegvideo.o error_resilience.o | ||||
| OBJS-$(CONFIG_S302M_DECODER) += s302m.o | |||||
| OBJS-$(CONFIG_SGI_DECODER) += sgidec.o | OBJS-$(CONFIG_SGI_DECODER) += sgidec.o | ||||
| OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o | OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o | ||||
| OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o | OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o | ||||
| @@ -30,6 +30,8 @@ | |||||
| * add sane pulse detection | * add sane pulse detection | ||||
| ***********************************/ | ***********************************/ | ||||
| #include "libavutil/libm.h" // brought forward to work around cygwin header breakage | |||||
| #include <float.h> | #include <float.h> | ||||
| #include <math.h> | #include <math.h> | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| @@ -37,7 +39,6 @@ | |||||
| #include "aac.h" | #include "aac.h" | ||||
| #include "aacenc.h" | #include "aacenc.h" | ||||
| #include "aactab.h" | #include "aactab.h" | ||||
| #include "libavutil/libm.h" | |||||
| /** bits needed to code codebook run value for long windows */ | /** bits needed to code codebook run value for long windows */ | ||||
| static const uint8_t run_value_bits_long[64] = { | static const uint8_t run_value_bits_long[64] = { | ||||
| @@ -184,6 +184,7 @@ void avcodec_register_all(void) | |||||
| REGISTER_ENCDEC (RV20, rv20); | REGISTER_ENCDEC (RV20, rv20); | ||||
| REGISTER_DECODER (RV30, rv30); | REGISTER_DECODER (RV30, rv30); | ||||
| REGISTER_DECODER (RV40, rv40); | REGISTER_DECODER (RV40, rv40); | ||||
| REGISTER_DECODER (S302M, s302m); | |||||
| REGISTER_ENCDEC (SGI, sgi); | REGISTER_ENCDEC (SGI, sgi); | ||||
| REGISTER_DECODER (SMACKER, smacker); | REGISTER_DECODER (SMACKER, smacker); | ||||
| REGISTER_DECODER (SMC, smc); | REGISTER_DECODER (SMC, smc); | ||||
| @@ -232,6 +232,7 @@ enum CodecID { | |||||
| CODEC_ID_PCM_F64LE, | CODEC_ID_PCM_F64LE, | ||||
| CODEC_ID_PCM_BLURAY, | CODEC_ID_PCM_BLURAY, | ||||
| CODEC_ID_PCM_LXF, | CODEC_ID_PCM_LXF, | ||||
| CODEC_ID_S302M, | |||||
| /* various ADPCM codecs */ | /* various ADPCM codecs */ | ||||
| CODEC_ID_ADPCM_IMA_QT= 0x11000, | CODEC_ID_ADPCM_IMA_QT= 0x11000, | ||||
| @@ -1352,22 +1352,22 @@ static av_cold int flac_encode_close(AVCodecContext *avctx) | |||||
| #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM | #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM | ||||
| static const AVOption options[] = { | static const AVOption options[] = { | ||||
| { "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, 15, 0, MAX_LPC_PRECISION, FLAGS }, | |||||
| { "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" }, | |||||
| { "none", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_NONE, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "fixed", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_FIXED, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "levinson", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "cholesky", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, FLAGS }, | |||||
| { "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, | |||||
| { "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS }, | |||||
| { "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, -1, -1, ORDER_METHOD_LOG, FLAGS, "predm" }, | |||||
| { "estimation", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_EST, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "2level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_2LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "4level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_4LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "8level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_8LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "search", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_SEARCH, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "log", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_LOG, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = 15 }, 0, MAX_LPC_PRECISION, FLAGS }, | |||||
| { "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, {.dbl = FF_LPC_TYPE_DEFAULT }, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" }, | |||||
| { "none", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_NONE }, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "fixed", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_FIXED }, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "levinson", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "cholesky", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, FLAGS, "lpc_type" }, | |||||
| { "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, FLAGS }, | |||||
| { "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, MAX_PARTITION_ORDER, FLAGS }, | |||||
| { "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, MAX_PARTITION_ORDER, FLAGS }, | |||||
| { "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, ORDER_METHOD_LOG, FLAGS, "predm" }, | |||||
| { "estimation", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_EST }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "2level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_2LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "4level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_4LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "8level", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_8LEVEL }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "search", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_SEARCH }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { "log", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = ORDER_METHOD_LOG }, INT_MIN, INT_MAX, FLAGS, "predm" }, | |||||
| { NULL }, | { NULL }, | ||||
| }; | }; | ||||
| @@ -305,11 +305,11 @@ static const AVOption options[]={ | |||||
| {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, | {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, | ||||
| #if FF_API_ANTIALIAS_ALGO | #if FF_API_ANTIALIAS_ALGO | ||||
| {"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D, "aa"}, | {"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D, "aa"}, | ||||
| #endif | |||||
| {"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_AUTO }, INT_MIN, INT_MAX, V|D, "aa"}, | {"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_AUTO }, INT_MIN, INT_MAX, V|D, "aa"}, | ||||
| {"fastint", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FASTINT }, INT_MIN, INT_MAX, V|D, "aa"}, | {"fastint", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FASTINT }, INT_MIN, INT_MAX, V|D, "aa"}, | ||||
| {"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_INT }, INT_MIN, INT_MAX, V|D, "aa"}, | {"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_INT }, INT_MIN, INT_MAX, V|D, "aa"}, | ||||
| {"float", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FLOAT }, INT_MIN, INT_MAX, V|D, "aa"}, | {"float", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FLOAT }, INT_MIN, INT_MAX, V|D, "aa"}, | ||||
| #endif | |||||
| {"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, | {"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, | ||||
| {"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E|D}, | {"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E|D}, | ||||
| {"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, | {"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, | ||||
| @@ -29,6 +29,8 @@ | |||||
| #include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
| #include "libavutil/samplefmt.h" | #include "libavutil/samplefmt.h" | ||||
| #define MAX_CHANNELS 8 | |||||
| struct AVResampleContext; | struct AVResampleContext; | ||||
| static const char *context_to_name(void *ptr) | static const char *context_to_name(void *ptr) | ||||
| @@ -37,20 +39,22 @@ static const char *context_to_name(void *ptr) | |||||
| } | } | ||||
| static const AVOption options[] = {{NULL}}; | static const AVOption options[] = {{NULL}}; | ||||
| static const AVClass audioresample_context_class = { "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT }; | |||||
| static const AVClass audioresample_context_class = { | |||||
| "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT | |||||
| }; | |||||
| struct ReSampleContext { | struct ReSampleContext { | ||||
| struct AVResampleContext *resample_context; | struct AVResampleContext *resample_context; | ||||
| short *temp[2]; | |||||
| short *temp[MAX_CHANNELS]; | |||||
| int temp_len; | int temp_len; | ||||
| float ratio; | float ratio; | ||||
| /* channel convert */ | /* channel convert */ | ||||
| int input_channels, output_channels, filter_channels; | int input_channels, output_channels, filter_channels; | ||||
| AVAudioConvert *convert_ctx[2]; | AVAudioConvert *convert_ctx[2]; | ||||
| enum AVSampleFormat sample_fmt[2]; ///< input and output sample format | enum AVSampleFormat sample_fmt[2]; ///< input and output sample format | ||||
| unsigned sample_size[2]; ///< size of one sample in sample_fmt | |||||
| short *buffer[2]; ///< buffers used for conversion to S16 | |||||
| unsigned buffer_size[2]; ///< sizes of allocated buffers | |||||
| unsigned sample_size[2]; ///< size of one sample in sample_fmt | |||||
| short *buffer[2]; ///< buffers used for conversion to S16 | |||||
| unsigned buffer_size[2]; ///< sizes of allocated buffers | |||||
| }; | }; | ||||
| /* n1: number of samples */ | /* n1: number of samples */ | ||||
| @@ -104,41 +108,42 @@ static void mono_to_stereo(short *output, short *input, int n1) | |||||
| } | } | ||||
| } | } | ||||
| /* XXX: should use more abstract 'N' channels system */ | |||||
| static void stereo_split(short *output1, short *output2, short *input, int n) | |||||
| static void deinterleave(short **output, short *input, int channels, int samples) | |||||
| { | { | ||||
| int i; | |||||
| int i, j; | |||||
| for(i=0;i<n;i++) { | |||||
| *output1++ = *input++; | |||||
| *output2++ = *input++; | |||||
| for (i = 0; i < samples; i++) { | |||||
| for (j = 0; j < channels; j++) { | |||||
| *output[j]++ = *input++; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| static void stereo_mux(short *output, short *input1, short *input2, int n) | |||||
| static void interleave(short *output, short **input, int channels, int samples) | |||||
| { | { | ||||
| int i; | |||||
| int i, j; | |||||
| for(i=0;i<n;i++) { | |||||
| *output++ = *input1++; | |||||
| *output++ = *input2++; | |||||
| for (i = 0; i < samples; i++) { | |||||
| for (j = 0; j < channels; j++) { | |||||
| *output++ = *input[j]++; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| static void ac3_5p1_mux(short *output, short *input1, short *input2, int n) | static void ac3_5p1_mux(short *output, short *input1, short *input2, int n) | ||||
| { | { | ||||
| int i; | int i; | ||||
| short l,r; | |||||
| for(i=0;i<n;i++) { | |||||
| l=*input1++; | |||||
| r=*input2++; | |||||
| *output++ = l; /* left */ | |||||
| *output++ = (l/2)+(r/2); /* center */ | |||||
| *output++ = r; /* right */ | |||||
| *output++ = 0; /* left surround */ | |||||
| *output++ = 0; /* right surroud */ | |||||
| *output++ = 0; /* low freq */ | |||||
| short l, r; | |||||
| for (i = 0; i < n; i++) { | |||||
| l = *input1++; | |||||
| r = *input2++; | |||||
| *output++ = l; /* left */ | |||||
| *output++ = (l / 2) + (r / 2); /* center */ | |||||
| *output++ = r; /* right */ | |||||
| *output++ = 0; /* left surround */ | |||||
| *output++ = 0; /* right surroud */ | |||||
| *output++ = 0; /* low freq */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -151,18 +156,25 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, | |||||
| { | { | ||||
| ReSampleContext *s; | ReSampleContext *s; | ||||
| if ( input_channels > 2) | |||||
| { | |||||
| av_log(NULL, AV_LOG_ERROR, "Resampling with input channels greater than 2 unsupported.\n"); | |||||
| if (input_channels > MAX_CHANNELS) { | |||||
| av_log(NULL, AV_LOG_ERROR, | |||||
| "Resampling with input channels greater than %d is unsupported.\n", | |||||
| MAX_CHANNELS); | |||||
| return NULL; | |||||
| } | |||||
| if (output_channels > 2 && | |||||
| !(output_channels == 6 && input_channels == 2) && | |||||
| output_channels != input_channels) { | |||||
| av_log(NULL, AV_LOG_ERROR, | |||||
| "Resampling output channel count must be 1 or 2 for mono input; 1, 2 or 6 for stereo input; or N for N channel input.\n"); | |||||
| return NULL; | return NULL; | ||||
| } | |||||
| } | |||||
| s = av_mallocz(sizeof(ReSampleContext)); | s = av_mallocz(sizeof(ReSampleContext)); | ||||
| if (!s) | |||||
| { | |||||
| if (!s) { | |||||
| av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n"); | av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n"); | ||||
| return NULL; | return NULL; | ||||
| } | |||||
| } | |||||
| s->ratio = (float)output_rate / (float)input_rate; | s->ratio = (float)output_rate / (float)input_rate; | ||||
| @@ -173,10 +185,10 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, | |||||
| if (s->output_channels < s->filter_channels) | if (s->output_channels < s->filter_channels) | ||||
| s->filter_channels = s->output_channels; | s->filter_channels = s->output_channels; | ||||
| s->sample_fmt [0] = sample_fmt_in; | |||||
| s->sample_fmt [1] = sample_fmt_out; | |||||
| s->sample_size[0] = av_get_bits_per_sample_fmt(s->sample_fmt[0])>>3; | |||||
| s->sample_size[1] = av_get_bits_per_sample_fmt(s->sample_fmt[1])>>3; | |||||
| s->sample_fmt[0] = sample_fmt_in; | |||||
| s->sample_fmt[1] = sample_fmt_out; | |||||
| s->sample_size[0] = av_get_bits_per_sample_fmt(s->sample_fmt[0]) >> 3; | |||||
| s->sample_size[1] = av_get_bits_per_sample_fmt(s->sample_fmt[1]) >> 3; | |||||
| if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) { | if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) { | ||||
| if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1, | if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1, | ||||
| @@ -201,17 +213,10 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, | |||||
| } | } | ||||
| } | } | ||||
| /* | |||||
| * AC-3 output is the only case where filter_channels could be greater than 2. | |||||
| * input channels can't be greater than 2, so resample the 2 channels and then | |||||
| * expand to 6 channels after the resampling. | |||||
| */ | |||||
| if(s->filter_channels>2) | |||||
| s->filter_channels = 2; | |||||
| #define TAPS 16 | #define TAPS 16 | ||||
| s->resample_context= av_resample_init(output_rate, input_rate, | |||||
| filter_length, log2_phase_count, linear, cutoff); | |||||
| s->resample_context = av_resample_init(output_rate, input_rate, | |||||
| filter_length, log2_phase_count, | |||||
| linear, cutoff); | |||||
| *(const AVClass**)s->resample_context = &audioresample_context_class; | *(const AVClass**)s->resample_context = &audioresample_context_class; | ||||
| @@ -223,9 +228,9 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, | |||||
| int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples) | int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples) | ||||
| { | { | ||||
| int i, nb_samples1; | int i, nb_samples1; | ||||
| short *bufin[2]; | |||||
| short *bufout[2]; | |||||
| short *buftmp2[2], *buftmp3[2]; | |||||
| short *bufin[MAX_CHANNELS]; | |||||
| short *bufout[MAX_CHANNELS]; | |||||
| short *buftmp2[MAX_CHANNELS], *buftmp3[MAX_CHANNELS]; | |||||
| short *output_bak = NULL; | short *output_bak = NULL; | ||||
| int lenout; | int lenout; | ||||
| @@ -240,7 +245,7 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl | |||||
| int ostride[1] = { 2 }; | int ostride[1] = { 2 }; | ||||
| const void *ibuf[1] = { input }; | const void *ibuf[1] = { input }; | ||||
| void *obuf[1]; | void *obuf[1]; | ||||
| unsigned input_size = nb_samples*s->input_channels*2; | |||||
| unsigned input_size = nb_samples * s->input_channels * 2; | |||||
| if (!s->buffer_size[0] || s->buffer_size[0] < input_size) { | if (!s->buffer_size[0] || s->buffer_size[0] < input_size) { | ||||
| av_free(s->buffer[0]); | av_free(s->buffer[0]); | ||||
| @@ -255,12 +260,13 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl | |||||
| obuf[0] = s->buffer[0]; | obuf[0] = s->buffer[0]; | ||||
| if (av_audio_convert(s->convert_ctx[0], obuf, ostride, | if (av_audio_convert(s->convert_ctx[0], obuf, ostride, | ||||
| ibuf, istride, nb_samples*s->input_channels) < 0) { | |||||
| av_log(s->resample_context, AV_LOG_ERROR, "Audio sample format conversion failed\n"); | |||||
| ibuf, istride, nb_samples * s->input_channels) < 0) { | |||||
| av_log(s->resample_context, AV_LOG_ERROR, | |||||
| "Audio sample format conversion failed\n"); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| input = s->buffer[0]; | |||||
| input = s->buffer[0]; | |||||
| } | } | ||||
| lenout= 2*s->output_channels*nb_samples * s->ratio + 16; | lenout= 2*s->output_channels*nb_samples * s->ratio + 16; | ||||
| @@ -282,52 +288,50 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl | |||||
| } | } | ||||
| /* XXX: move those malloc to resample init code */ | /* XXX: move those malloc to resample init code */ | ||||
| for(i=0; i<s->filter_channels; i++){ | |||||
| bufin[i]= av_malloc( (nb_samples + s->temp_len) * sizeof(short) ); | |||||
| for (i = 0; i < s->filter_channels; i++) { | |||||
| bufin[i] = av_malloc((nb_samples + s->temp_len) * sizeof(short)); | |||||
| memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short)); | memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short)); | ||||
| buftmp2[i] = bufin[i] + s->temp_len; | buftmp2[i] = bufin[i] + s->temp_len; | ||||
| bufout[i] = av_malloc(lenout * sizeof(short)); | |||||
| } | } | ||||
| /* make some zoom to avoid round pb */ | |||||
| bufout[0]= av_malloc( lenout * sizeof(short) ); | |||||
| bufout[1]= av_malloc( lenout * sizeof(short) ); | |||||
| if (s->input_channels == 2 && | |||||
| s->output_channels == 1) { | |||||
| if (s->input_channels == 2 && s->output_channels == 1) { | |||||
| buftmp3[0] = output; | buftmp3[0] = output; | ||||
| stereo_to_mono(buftmp2[0], input, nb_samples); | stereo_to_mono(buftmp2[0], input, nb_samples); | ||||
| } else if (s->output_channels >= 2 && s->input_channels == 1) { | } else if (s->output_channels >= 2 && s->input_channels == 1) { | ||||
| buftmp3[0] = bufout[0]; | buftmp3[0] = bufout[0]; | ||||
| memcpy(buftmp2[0], input, nb_samples*sizeof(short)); | |||||
| } else if (s->output_channels >= 2) { | |||||
| buftmp3[0] = bufout[0]; | |||||
| buftmp3[1] = bufout[1]; | |||||
| stereo_split(buftmp2[0], buftmp2[1], input, nb_samples); | |||||
| memcpy(buftmp2[0], input, nb_samples * sizeof(short)); | |||||
| } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) { | |||||
| for (i = 0; i < s->input_channels; i++) { | |||||
| buftmp3[i] = bufout[i]; | |||||
| } | |||||
| deinterleave(buftmp2, input, s->input_channels, nb_samples); | |||||
| } else { | } else { | ||||
| buftmp3[0] = output; | buftmp3[0] = output; | ||||
| memcpy(buftmp2[0], input, nb_samples*sizeof(short)); | |||||
| memcpy(buftmp2[0], input, nb_samples * sizeof(short)); | |||||
| } | } | ||||
| nb_samples += s->temp_len; | nb_samples += s->temp_len; | ||||
| /* resample each channel */ | /* resample each channel */ | ||||
| nb_samples1 = 0; /* avoid warning */ | nb_samples1 = 0; /* avoid warning */ | ||||
| for(i=0;i<s->filter_channels;i++) { | |||||
| for (i = 0; i < s->filter_channels; i++) { | |||||
| int consumed; | int consumed; | ||||
| int is_last= i+1 == s->filter_channels; | |||||
| int is_last = i + 1 == s->filter_channels; | |||||
| nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i], &consumed, nb_samples, lenout, is_last); | |||||
| s->temp_len= nb_samples - consumed; | |||||
| s->temp[i]= av_realloc(s->temp[i], s->temp_len*sizeof(short)); | |||||
| memcpy(s->temp[i], bufin[i] + consumed, s->temp_len*sizeof(short)); | |||||
| nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i], | |||||
| &consumed, nb_samples, lenout, is_last); | |||||
| s->temp_len = nb_samples - consumed; | |||||
| s->temp[i] = av_realloc(s->temp[i], s->temp_len * sizeof(short)); | |||||
| memcpy(s->temp[i], bufin[i] + consumed, s->temp_len * sizeof(short)); | |||||
| } | } | ||||
| if (s->output_channels == 2 && s->input_channels == 1) { | if (s->output_channels == 2 && s->input_channels == 1) { | ||||
| mono_to_stereo(output, buftmp3[0], nb_samples1); | mono_to_stereo(output, buftmp3[0], nb_samples1); | ||||
| } else if (s->output_channels == 2) { | |||||
| stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1); | |||||
| } else if (s->output_channels == 6) { | |||||
| } else if (s->output_channels == 6 && s->input_channels == 2) { | |||||
| ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1); | ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1); | ||||
| } else if (s->output_channels == s->input_channels && s->input_channels >= 2) { | |||||
| interleave(output, buftmp3, s->output_channels, nb_samples1); | |||||
| } | } | ||||
| if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) { | if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) { | ||||
| @@ -337,25 +341,27 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl | |||||
| void *obuf[1] = { output_bak }; | void *obuf[1] = { output_bak }; | ||||
| if (av_audio_convert(s->convert_ctx[1], obuf, ostride, | if (av_audio_convert(s->convert_ctx[1], obuf, ostride, | ||||
| ibuf, istride, nb_samples1*s->output_channels) < 0) { | |||||
| av_log(s->resample_context, AV_LOG_ERROR, "Audio sample format convertion failed\n"); | |||||
| ibuf, istride, nb_samples1 * s->output_channels) < 0) { | |||||
| av_log(s->resample_context, AV_LOG_ERROR, | |||||
| "Audio sample format convertion failed\n"); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| } | } | ||||
| for(i=0; i<s->filter_channels; i++) | |||||
| for (i = 0; i < s->filter_channels; i++) { | |||||
| av_free(bufin[i]); | av_free(bufin[i]); | ||||
| av_free(bufout[i]); | |||||
| } | |||||
| av_free(bufout[0]); | |||||
| av_free(bufout[1]); | |||||
| return nb_samples1; | return nb_samples1; | ||||
| } | } | ||||
| void audio_resample_close(ReSampleContext *s) | void audio_resample_close(ReSampleContext *s) | ||||
| { | { | ||||
| int i; | |||||
| av_resample_close(s->resample_context); | av_resample_close(s->resample_context); | ||||
| av_freep(&s->temp[0]); | |||||
| av_freep(&s->temp[1]); | |||||
| for (i = 0; i < s->filter_channels; i++) | |||||
| av_freep(&s->temp[i]); | |||||
| av_freep(&s->buffer[0]); | av_freep(&s->buffer[0]); | ||||
| av_freep(&s->buffer[1]); | av_freep(&s->buffer[1]); | ||||
| av_audio_convert_free(s->convert_ctx[0]); | av_audio_convert_free(s->convert_ctx[0]); | ||||
| @@ -0,0 +1,141 @@ | |||||
| /* | |||||
| * SMPTE 302M decoder | |||||
| * Copyright (c) 2008 Laurent Aimar <fenrir@videolan.org> | |||||
| * Copyright (c) 2009 Baptiste Coudurier <baptiste.coudurier@gmail.com> | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/intreadwrite.h" | |||||
| #include "avcodec.h" | |||||
| #define AES3_HEADER_LEN 4 | |||||
| static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf, | |||||
| int buf_size) | |||||
| { | |||||
| uint32_t h; | |||||
| int frame_size, channels, id, bits; | |||||
| if (buf_size <= AES3_HEADER_LEN) { | |||||
| av_log(avctx, AV_LOG_ERROR, "frame is too short\n"); | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | |||||
| /* | |||||
| * AES3 header : | |||||
| * size: 16 | |||||
| * number channels 2 | |||||
| * channel_id 8 | |||||
| * bits per samples 2 | |||||
| * alignments 4 | |||||
| */ | |||||
| h = AV_RB32(buf); | |||||
| frame_size = (h >> 16) & 0xffff; | |||||
| channels = ((h >> 14) & 0x0003) * 2 + 2; | |||||
| id = (h >> 6) & 0x00ff; | |||||
| bits = ((h >> 4) & 0x0003) * 4 + 16; | |||||
| if (AES3_HEADER_LEN + frame_size != buf_size || bits > 24) { | |||||
| av_log(avctx, AV_LOG_ERROR, "frame has invalid header\n"); | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | |||||
| /* Set output properties */ | |||||
| avctx->bits_per_coded_sample = bits; | |||||
| if (bits > 16) | |||||
| avctx->sample_fmt = SAMPLE_FMT_S32; | |||||
| else | |||||
| avctx->sample_fmt = SAMPLE_FMT_S16; | |||||
| avctx->channels = channels; | |||||
| avctx->sample_rate = 48000; | |||||
| avctx->bit_rate = 48000 * avctx->channels * (avctx->bits_per_coded_sample + 4) + | |||||
| 32 * (48000 / (buf_size * 8 / | |||||
| (avctx->channels * | |||||
| (avctx->bits_per_coded_sample + 4)))); | |||||
| return frame_size; | |||||
| } | |||||
| static int s302m_decode_frame(AVCodecContext *avctx, void *data, | |||||
| int *data_size, AVPacket *avpkt) | |||||
| { | |||||
| const uint8_t *buf = avpkt->data; | |||||
| int buf_size = avpkt->size; | |||||
| int frame_size = s302m_parse_frame_header(avctx, buf, buf_size); | |||||
| if (frame_size < 0) | |||||
| return frame_size; | |||||
| buf_size -= AES3_HEADER_LEN; | |||||
| buf += AES3_HEADER_LEN; | |||||
| if (*data_size < 4 * buf_size * 8 / (avctx->bits_per_coded_sample + 4)) | |||||
| return -1; | |||||
| if (avctx->bits_per_coded_sample == 24) { | |||||
| uint32_t *o = data; | |||||
| for (; buf_size > 6; buf_size -= 7) { | |||||
| *o++ = (av_reverse[buf[2]] << 24) | | |||||
| (av_reverse[buf[1]] << 16) | | |||||
| (av_reverse[buf[0]] << 8); | |||||
| *o++ = (av_reverse[buf[6] & 0xf0] << 28) | | |||||
| (av_reverse[buf[5]] << 20) | | |||||
| (av_reverse[buf[4]] << 12) | | |||||
| (av_reverse[buf[3] & 0x0f] << 8); | |||||
| buf += 7; | |||||
| } | |||||
| *data_size = (uint8_t*) o - (uint8_t*) data; | |||||
| } else if (avctx->bits_per_coded_sample == 20) { | |||||
| uint32_t *o = data; | |||||
| for (; buf_size > 5; buf_size -= 6) { | |||||
| *o++ = (av_reverse[buf[2] & 0xf0] << 28) | | |||||
| (av_reverse[buf[1]] << 20) | | |||||
| (av_reverse[buf[0]] << 12); | |||||
| *o++ = (av_reverse[buf[5] & 0xf0] << 28) | | |||||
| (av_reverse[buf[4]] << 20) | | |||||
| (av_reverse[buf[3]] << 12); | |||||
| buf += 6; | |||||
| } | |||||
| *data_size = (uint8_t*) o - (uint8_t*) data; | |||||
| } else { | |||||
| uint16_t *o = data; | |||||
| for (; buf_size > 4; buf_size -= 5) { | |||||
| *o++ = (av_reverse[buf[1]] << 8) | | |||||
| av_reverse[buf[0]]; | |||||
| *o++ = (av_reverse[buf[4] & 0xf0] << 12) | | |||||
| (av_reverse[buf[3]] << 4) | | |||||
| av_reverse[buf[2] & 0x0f]; | |||||
| buf += 5; | |||||
| } | |||||
| *data_size = (uint8_t*) o - (uint8_t*) data; | |||||
| } | |||||
| return buf - avpkt->data; | |||||
| } | |||||
| AVCodec ff_s302m_decoder = { | |||||
| .name = "s302m", | |||||
| .type = AVMEDIA_TYPE_AUDIO, | |||||
| .id = CODEC_ID_S302M, | |||||
| .priv_data_size = 0, | |||||
| .decode = s302m_decode_frame, | |||||
| .long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"), | |||||
| }; | |||||
| @@ -168,7 +168,13 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin | |||||
| } | } | ||||
| switch(s->compr){ | switch(s->compr){ | ||||
| case TIFF_RAW: | case TIFF_RAW: | ||||
| memcpy(dst, src, width); | |||||
| if (!s->fill_order) { | |||||
| memcpy(dst, src, width); | |||||
| } else { | |||||
| int i; | |||||
| for (i = 0; i < width; i++) | |||||
| dst[i] = av_reverse[src[i]]; | |||||
| } | |||||
| src += width; | src += width; | ||||
| break; | break; | ||||
| case TIFF_PACKBITS: | case TIFF_PACKBITS: | ||||
| @@ -9,6 +9,7 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \ | |||||
| MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o | MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o | ||||
| YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ | YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ | ||||
| x86/h264_deblock_10bit.o \ | |||||
| x86/h264_weight.o \ | x86/h264_weight.o \ | ||||
| x86/h264_idct.o \ | x86/h264_idct.o \ | ||||
| @@ -43,6 +43,7 @@ DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = | |||||
| {0x8000000080000000ULL, 0x8000000080000000ULL}; | {0x8000000080000000ULL, 0x8000000080000000ULL}; | ||||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL; | DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL; | ||||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2 ) = {0x0002000200020002ULL, 0x0002000200020002ULL}; | |||||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL}; | DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL}; | ||||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL}; | DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL}; | ||||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL}; | DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL}; | ||||
| @@ -1,10 +1,11 @@ | |||||
| ;***************************************************************************** | ;***************************************************************************** | ||||
| ;* MMX/SSE2-optimized H.264 deblocking code | |||||
| ;* MMX/SSE2/AVX-optimized H.264 deblocking code | |||||
| ;***************************************************************************** | ;***************************************************************************** | ||||
| ;* Copyright (C) 2005-2008 x264 project | |||||
| ;* Copyright (C) 2005-2011 x264 project | |||||
| ;* | ;* | ||||
| ;* Authors: Loren Merritt <lorenm@u.washington.edu> | ;* Authors: Loren Merritt <lorenm@u.washington.edu> | ||||
| ;* Jason Garrett-Glaser <darkshikari@gmail.com> | ;* Jason Garrett-Glaser <darkshikari@gmail.com> | ||||
| ;* Oskar Arvidsson <oskar@irock.se> | |||||
| ;* | ;* | ||||
| ;* This file is part of FFmpeg. | ;* This file is part of FFmpeg. | ||||
| ;* | ;* | ||||
| @@ -26,96 +27,94 @@ | |||||
| %include "x86inc.asm" | %include "x86inc.asm" | ||||
| %include "x86util.asm" | %include "x86util.asm" | ||||
| SECTION_RODATA | |||||
| SECTION .text | |||||
| cextern pb_0 | cextern pb_0 | ||||
| cextern pb_1 | cextern pb_1 | ||||
| cextern pb_3 | cextern pb_3 | ||||
| cextern pb_A1 | cextern pb_A1 | ||||
| SECTION .text | |||||
| ; expands to [base],...,[base+7*stride] | ; expands to [base],...,[base+7*stride] | ||||
| %define PASS8ROWS(base, base3, stride, stride3) \ | %define PASS8ROWS(base, base3, stride, stride3) \ | ||||
| [base], [base+stride], [base+stride*2], [base3], \ | [base], [base+stride], [base+stride*2], [base3], \ | ||||
| [base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] | [base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] | ||||
| ; in: 8 rows of 4 bytes in %1..%8 | |||||
| %define PASS8ROWS(base, base3, stride, stride3, offset) \ | |||||
| PASS8ROWS(base+offset, base3+offset, stride, stride3) | |||||
| ; in: 8 rows of 4 bytes in %4..%11 | |||||
| ; out: 4 rows of 8 bytes in m0..m3 | ; out: 4 rows of 8 bytes in m0..m3 | ||||
| %macro TRANSPOSE4x8_LOAD 8 | |||||
| movd m0, %1 | |||||
| movd m2, %2 | |||||
| movd m1, %3 | |||||
| movd m3, %4 | |||||
| punpcklbw m0, m2 | |||||
| punpcklbw m1, m3 | |||||
| movq m2, m0 | |||||
| punpcklwd m0, m1 | |||||
| punpckhwd m2, m1 | |||||
| movd m4, %5 | |||||
| movd m6, %6 | |||||
| movd m5, %7 | |||||
| movd m7, %8 | |||||
| punpcklbw m4, m6 | |||||
| punpcklbw m5, m7 | |||||
| movq m6, m4 | |||||
| punpcklwd m4, m5 | |||||
| punpckhwd m6, m5 | |||||
| movq m1, m0 | |||||
| movq m3, m2 | |||||
| punpckldq m0, m4 | |||||
| punpckhdq m1, m4 | |||||
| punpckldq m2, m6 | |||||
| punpckhdq m3, m6 | |||||
| %macro TRANSPOSE4x8_LOAD 11 | |||||
| movh m0, %4 | |||||
| movh m2, %5 | |||||
| movh m1, %6 | |||||
| movh m3, %7 | |||||
| punpckl%1 m0, m2 | |||||
| punpckl%1 m1, m3 | |||||
| mova m2, m0 | |||||
| punpckl%2 m0, m1 | |||||
| punpckh%2 m2, m1 | |||||
| movh m4, %8 | |||||
| movh m6, %9 | |||||
| movh m5, %10 | |||||
| movh m7, %11 | |||||
| punpckl%1 m4, m6 | |||||
| punpckl%1 m5, m7 | |||||
| mova m6, m4 | |||||
| punpckl%2 m4, m5 | |||||
| punpckh%2 m6, m5 | |||||
| punpckh%3 m1, m0, m4 | |||||
| punpckh%3 m3, m2, m6 | |||||
| punpckl%3 m0, m4 | |||||
| punpckl%3 m2, m6 | |||||
| %endmacro | %endmacro | ||||
| ; in: 4 rows of 8 bytes in m0..m3 | ; in: 4 rows of 8 bytes in m0..m3 | ||||
| ; out: 8 rows of 4 bytes in %1..%8 | ; out: 8 rows of 4 bytes in %1..%8 | ||||
| %macro TRANSPOSE8x4_STORE 8 | |||||
| movq m4, m0 | |||||
| movq m5, m1 | |||||
| movq m6, m2 | |||||
| punpckhdq m4, m4 | |||||
| punpckhdq m5, m5 | |||||
| punpckhdq m6, m6 | |||||
| %macro TRANSPOSE8x4B_STORE 8 | |||||
| punpckhdq m4, m0, m0 | |||||
| punpckhdq m5, m1, m1 | |||||
| punpckhdq m6, m2, m2 | |||||
| punpcklbw m0, m1 | punpcklbw m0, m1 | ||||
| punpcklbw m2, m3 | punpcklbw m2, m3 | ||||
| movq m1, m0 | |||||
| punpcklwd m0, m2 | |||||
| punpckhwd m1, m2 | |||||
| movd %1, m0 | |||||
| punpckhdq m0, m0 | |||||
| movd %2, m0 | |||||
| movd %3, m1 | |||||
| punpcklwd m1, m0, m2 | |||||
| punpckhwd m0, m2 | |||||
| movh %1, m1 | |||||
| punpckhdq m1, m1 | punpckhdq m1, m1 | ||||
| movd %4, m1 | |||||
| movh %2, m1 | |||||
| movh %3, m0 | |||||
| punpckhdq m0, m0 | |||||
| movh %4, m0 | |||||
| punpckhdq m3, m3 | punpckhdq m3, m3 | ||||
| punpcklbw m4, m5 | punpcklbw m4, m5 | ||||
| punpcklbw m6, m3 | punpcklbw m6, m3 | ||||
| movq m5, m4 | |||||
| punpcklwd m4, m6 | |||||
| punpckhwd m5, m6 | |||||
| movd %5, m4 | |||||
| punpckhdq m4, m4 | |||||
| movd %6, m4 | |||||
| movd %7, m5 | |||||
| punpcklwd m5, m4, m6 | |||||
| punpckhwd m4, m6 | |||||
| movh %5, m5 | |||||
| punpckhdq m5, m5 | punpckhdq m5, m5 | ||||
| movd %8, m5 | |||||
| movh %6, m5 | |||||
| movh %7, m4 | |||||
| punpckhdq m4, m4 | |||||
| movh %8, m4 | |||||
| %endmacro | |||||
| %macro TRANSPOSE4x8B_LOAD 8 | |||||
| TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8 | |||||
| %endmacro | %endmacro | ||||
| %macro SBUTTERFLY3 4 | %macro SBUTTERFLY3 4 | ||||
| movq %4, %2 | |||||
| punpckh%1 %4, %2, %3 | |||||
| punpckl%1 %2, %3 | punpckl%1 %2, %3 | ||||
| punpckh%1 %4, %3 | |||||
| %endmacro | %endmacro | ||||
| ; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8 | ; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8 | ||||
| ; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16] | ; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16] | ||||
| %macro TRANSPOSE6x8_MEM 9 | %macro TRANSPOSE6x8_MEM 9 | ||||
| RESET_MM_PERMUTATION | |||||
| movq m0, %1 | movq m0, %1 | ||||
| movq m1, %2 | movq m1, %2 | ||||
| movq m2, %3 | movq m2, %3 | ||||
| @@ -123,30 +122,32 @@ SECTION .text | |||||
| movq m4, %5 | movq m4, %5 | ||||
| movq m5, %6 | movq m5, %6 | ||||
| movq m6, %7 | movq m6, %7 | ||||
| SBUTTERFLY3 bw, m0, m1, m7 | |||||
| SBUTTERFLY3 bw, m2, m3, m1 | |||||
| SBUTTERFLY3 bw, m4, m5, m3 | |||||
| movq [%9+0x10], m1 | |||||
| SBUTTERFLY3 bw, m6, %8, m5 | |||||
| SBUTTERFLY3 wd, m0, m2, m1 | |||||
| SBUTTERFLY3 wd, m4, m6, m2 | |||||
| SBUTTERFLY bw, 0, 1, 7 | |||||
| SBUTTERFLY bw, 2, 3, 7 | |||||
| SBUTTERFLY bw, 4, 5, 7 | |||||
| movq [%9+0x10], m3 | |||||
| SBUTTERFLY3 bw, m6, %8, m7 | |||||
| SBUTTERFLY wd, 0, 2, 3 | |||||
| SBUTTERFLY wd, 4, 6, 3 | |||||
| punpckhdq m0, m4 | punpckhdq m0, m4 | ||||
| movq [%9+0x00], m0 | movq [%9+0x00], m0 | ||||
| SBUTTERFLY3 wd, m7, [%9+0x10], m6 | |||||
| SBUTTERFLY3 wd, m3, m5, m4 | |||||
| SBUTTERFLY3 dq, m7, m3, m0 | |||||
| SBUTTERFLY3 dq, m1, m2, m5 | |||||
| punpckldq m6, m4 | |||||
| movq [%9+0x10], m1 | |||||
| movq [%9+0x20], m5 | |||||
| movq [%9+0x30], m7 | |||||
| movq [%9+0x40], m0 | |||||
| movq [%9+0x50], m6 | |||||
| SBUTTERFLY3 wd, m1, [%9+0x10], m3 | |||||
| SBUTTERFLY wd, 5, 7, 0 | |||||
| SBUTTERFLY dq, 1, 5, 0 | |||||
| SBUTTERFLY dq, 2, 6, 0 | |||||
| punpckldq m3, m7 | |||||
| movq [%9+0x10], m2 | |||||
| movq [%9+0x20], m6 | |||||
| movq [%9+0x30], m1 | |||||
| movq [%9+0x40], m5 | |||||
| movq [%9+0x50], m3 | |||||
| RESET_MM_PERMUTATION | |||||
| %endmacro | %endmacro | ||||
| ; in: 8 rows of 8 in %1..%8 | ; in: 8 rows of 8 in %1..%8 | ||||
| ; out: 8 rows of 8 in %9..%16 | ; out: 8 rows of 8 in %9..%16 | ||||
| %macro TRANSPOSE8x8_MEM 16 | %macro TRANSPOSE8x8_MEM 16 | ||||
| RESET_MM_PERMUTATION | |||||
| movq m0, %1 | movq m0, %1 | ||||
| movq m1, %2 | movq m1, %2 | ||||
| movq m2, %3 | movq m2, %3 | ||||
| @@ -154,38 +155,44 @@ SECTION .text | |||||
| movq m4, %5 | movq m4, %5 | ||||
| movq m5, %6 | movq m5, %6 | ||||
| movq m6, %7 | movq m6, %7 | ||||
| SBUTTERFLY3 bw, m0, m1, m7 | |||||
| SBUTTERFLY3 bw, m2, m3, m1 | |||||
| SBUTTERFLY3 bw, m4, m5, m3 | |||||
| SBUTTERFLY3 bw, m6, %8, m5 | |||||
| movq %9, m3 | |||||
| SBUTTERFLY3 wd, m0, m2, m3 | |||||
| SBUTTERFLY3 wd, m4, m6, m2 | |||||
| SBUTTERFLY3 wd, m7, m1, m6 | |||||
| movq %11, m2 | |||||
| movq m2, %9 | |||||
| SBUTTERFLY3 wd, m2, m5, m1 | |||||
| SBUTTERFLY3 dq, m0, m4, m5 | |||||
| SBUTTERFLY3 dq, m7, m2, m4 | |||||
| SBUTTERFLY bw, 0, 1, 7 | |||||
| SBUTTERFLY bw, 2, 3, 7 | |||||
| SBUTTERFLY bw, 4, 5, 7 | |||||
| SBUTTERFLY3 bw, m6, %8, m7 | |||||
| movq %9, m5 | |||||
| SBUTTERFLY wd, 0, 2, 5 | |||||
| SBUTTERFLY wd, 4, 6, 5 | |||||
| SBUTTERFLY wd, 1, 3, 5 | |||||
| movq %11, m6 | |||||
| movq m6, %9 | |||||
| SBUTTERFLY wd, 6, 7, 5 | |||||
| SBUTTERFLY dq, 0, 4, 5 | |||||
| SBUTTERFLY dq, 1, 6, 5 | |||||
| movq %9, m0 | movq %9, m0 | ||||
| movq %10, m5 | |||||
| movq %13, m7 | |||||
| movq %14, m4 | |||||
| SBUTTERFLY3 dq, m3, %11, m0 | |||||
| SBUTTERFLY3 dq, m6, m1, m5 | |||||
| movq %11, m3 | |||||
| movq %10, m4 | |||||
| movq %13, m1 | |||||
| movq %14, m6 | |||||
| SBUTTERFLY3 dq, m2, %11, m0 | |||||
| SBUTTERFLY dq, 3, 7, 4 | |||||
| movq %11, m2 | |||||
| movq %12, m0 | movq %12, m0 | ||||
| movq %15, m6 | |||||
| movq %16, m5 | |||||
| movq %15, m3 | |||||
| movq %16, m7 | |||||
| RESET_MM_PERMUTATION | |||||
| %endmacro | %endmacro | ||||
| ; out: %4 = |%1-%2|>%3 | ; out: %4 = |%1-%2|>%3 | ||||
| ; clobbers: %5 | ; clobbers: %5 | ||||
| %macro DIFF_GT 5 | %macro DIFF_GT 5 | ||||
| %if avx_enabled == 0 | |||||
| mova %5, %2 | mova %5, %2 | ||||
| mova %4, %1 | mova %4, %1 | ||||
| psubusb %5, %1 | psubusb %5, %1 | ||||
| psubusb %4, %2 | psubusb %4, %2 | ||||
| %else | |||||
| psubusb %5, %2, %1 | |||||
| psubusb %4, %1, %2 | |||||
| %endif | |||||
| por %4, %5 | por %4, %5 | ||||
| psubusb %4, %3 | psubusb %4, %3 | ||||
| %endmacro | %endmacro | ||||
| @@ -193,32 +200,28 @@ SECTION .text | |||||
| ; out: %4 = |%1-%2|>%3 | ; out: %4 = |%1-%2|>%3 | ||||
| ; clobbers: %5 | ; clobbers: %5 | ||||
| %macro DIFF_GT2 5 | %macro DIFF_GT2 5 | ||||
| %ifdef ARCH_X86_64 | |||||
| psubusb %5, %2, %1 | |||||
| psubusb %4, %1, %2 | |||||
| %else | |||||
| mova %5, %2 | mova %5, %2 | ||||
| mova %4, %1 | mova %4, %1 | ||||
| psubusb %5, %1 | psubusb %5, %1 | ||||
| psubusb %4, %2 | psubusb %4, %2 | ||||
| %endif | |||||
| psubusb %5, %3 | psubusb %5, %3 | ||||
| psubusb %4, %3 | psubusb %4, %3 | ||||
| pcmpeqb %4, %5 | pcmpeqb %4, %5 | ||||
| %endmacro | %endmacro | ||||
| %macro SPLATW 1 | |||||
| %ifidn m0, xmm0 | |||||
| pshuflw %1, %1, 0 | |||||
| punpcklqdq %1, %1 | |||||
| %else | |||||
| pshufw %1, %1, 0 | |||||
| %endif | |||||
| %endmacro | |||||
| ; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1 | ; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1 | ||||
| ; out: m5=beta-1, m7=mask, %3=alpha-1 | ; out: m5=beta-1, m7=mask, %3=alpha-1 | ||||
| ; clobbers: m4,m6 | ; clobbers: m4,m6 | ||||
| %macro LOAD_MASK 2-3 | %macro LOAD_MASK 2-3 | ||||
| movd m4, %1 | movd m4, %1 | ||||
| movd m5, %2 | movd m5, %2 | ||||
| SPLATW m4 | |||||
| SPLATW m5 | |||||
| SPLATW m4, m4 | |||||
| SPLATW m5, m5 | |||||
| packuswb m4, m4 ; 16x alpha-1 | packuswb m4, m4 ; 16x alpha-1 | ||||
| packuswb m5, m5 ; 16x beta-1 | packuswb m5, m5 ; 16x beta-1 | ||||
| %if %0>2 | %if %0>2 | ||||
| @@ -237,8 +240,7 @@ SECTION .text | |||||
| ; out: m1=p0' m2=q0' | ; out: m1=p0' m2=q0' | ||||
| ; clobbers: m0,3-6 | ; clobbers: m0,3-6 | ||||
| %macro DEBLOCK_P0_Q0 0 | %macro DEBLOCK_P0_Q0 0 | ||||
| mova m5, m1 | |||||
| pxor m5, m2 ; p0^q0 | |||||
| pxor m5, m1, m2 ; p0^q0 | |||||
| pand m5, [pb_1] ; (p0^q0)&1 | pand m5, [pb_1] ; (p0^q0)&1 | ||||
| pcmpeqb m4, m4 | pcmpeqb m4, m4 | ||||
| pxor m3, m4 | pxor m3, m4 | ||||
| @@ -264,14 +266,12 @@ SECTION .text | |||||
| ; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) | ; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) | ||||
| ; clobbers: q2, tmp, tc0 | ; clobbers: q2, tmp, tc0 | ||||
| %macro LUMA_Q1 6 | %macro LUMA_Q1 6 | ||||
| mova %6, m1 | |||||
| pavgb %6, m2 | |||||
| pavgb %6, m1, m2 | |||||
| pavgb %2, %6 ; avg(p2,avg(p0,q0)) | pavgb %2, %6 ; avg(p2,avg(p0,q0)) | ||||
| pxor %6, %3 | pxor %6, %3 | ||||
| pand %6, [pb_1] ; (p2^avg(p0,q0))&1 | pand %6, [pb_1] ; (p2^avg(p0,q0))&1 | ||||
| psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 | psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 | ||||
| mova %6, %1 | |||||
| psubusb %6, %5 | |||||
| psubusb %6, %1, %5 | |||||
| paddusb %5, %1 | paddusb %5, %1 | ||||
| pmaxub %2, %6 | pmaxub %2, %6 | ||||
| pminub %2, %5 | pminub %2, %5 | ||||
| @@ -280,10 +280,10 @@ SECTION .text | |||||
| %ifdef ARCH_X86_64 | %ifdef ARCH_X86_64 | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_XMM | |||||
| cglobal x264_deblock_v_luma_sse2, 5,5,10 | |||||
| %macro DEBLOCK_LUMA 1 | |||||
| cglobal deblock_v_luma_8_%1, 5,5,10 | |||||
| movd m8, [r4] ; tc0 | movd m8, [r4] ; tc0 | ||||
| lea r4, [r1*3] | lea r4, [r1*3] | ||||
| dec r2d ; alpha-1 | dec r2d ; alpha-1 | ||||
| @@ -307,8 +307,7 @@ cglobal x264_deblock_v_luma_sse2, 5,5,10 | |||||
| movdqa m3, [r4] ; p2 | movdqa m3, [r4] ; p2 | ||||
| DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | ||||
| pand m6, m9 | pand m6, m9 | ||||
| mova m7, m8 | |||||
| psubb m7, m6 | |||||
| psubb m7, m8, m6 | |||||
| pand m6, m8 | pand m6, m8 | ||||
| LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 | LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 | ||||
| @@ -326,10 +325,10 @@ cglobal x264_deblock_v_luma_sse2, 5,5,10 | |||||
| RET | RET | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_MMX | INIT_MMX | ||||
| cglobal x264_deblock_h_luma_sse2, 5,7 | |||||
| cglobal deblock_h_luma_8_%1, 5,7 | |||||
| movsxd r10, r1d | movsxd r10, r1d | ||||
| lea r11, [r10+r10*2] | lea r11, [r10+r10*2] | ||||
| lea r6, [r0-4] | lea r6, [r0-4] | ||||
| @@ -350,13 +349,13 @@ cglobal x264_deblock_h_luma_sse2, 5,7 | |||||
| ; vertical filter | ; vertical filter | ||||
| ; alpha, beta, tc0 are still in r2d, r3d, r4 | ; alpha, beta, tc0 are still in r2d, r3d, r4 | ||||
| ; don't backup r6, r5, r10, r11 because x264_deblock_v_luma_sse2 doesn't use them | |||||
| ; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them | |||||
| lea r0, [pix_tmp+0x30] | lea r0, [pix_tmp+0x30] | ||||
| mov r1d, 0x10 | mov r1d, 0x10 | ||||
| %ifdef WIN64 | %ifdef WIN64 | ||||
| mov [rsp+0x20], r4 | mov [rsp+0x20], r4 | ||||
| %endif | %endif | ||||
| call x264_deblock_v_luma_sse2 | |||||
| call deblock_v_luma_8_%1 | |||||
| ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) | ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) | ||||
| add r6, 2 | add r6, 2 | ||||
| @@ -365,7 +364,7 @@ cglobal x264_deblock_h_luma_sse2, 5,7 | |||||
| movq m1, [pix_tmp+0x28] | movq m1, [pix_tmp+0x28] | ||||
| movq m2, [pix_tmp+0x38] | movq m2, [pix_tmp+0x38] | ||||
| movq m3, [pix_tmp+0x48] | movq m3, [pix_tmp+0x48] | ||||
| TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11) | |||||
| shl r10, 3 | shl r10, 3 | ||||
| sub r6, r10 | sub r6, r10 | ||||
| @@ -375,7 +374,7 @@ cglobal x264_deblock_h_luma_sse2, 5,7 | |||||
| movq m1, [pix_tmp+0x20] | movq m1, [pix_tmp+0x20] | ||||
| movq m2, [pix_tmp+0x30] | movq m2, [pix_tmp+0x30] | ||||
| movq m3, [pix_tmp+0x40] | movq m3, [pix_tmp+0x40] | ||||
| TRANSPOSE8x4_STORE PASS8ROWS(r6, r5, r10, r11) | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11) | |||||
| %ifdef WIN64 | %ifdef WIN64 | ||||
| add rsp, 0x98 | add rsp, 0x98 | ||||
| @@ -383,14 +382,20 @@ cglobal x264_deblock_h_luma_sse2, 5,7 | |||||
| add rsp, 0x68 | add rsp, 0x68 | ||||
| %endif | %endif | ||||
| RET | RET | ||||
| %endmacro | |||||
| INIT_XMM | |||||
| DEBLOCK_LUMA sse2 | |||||
| INIT_AVX | |||||
| DEBLOCK_LUMA avx | |||||
| %else | %else | ||||
| %macro DEBLOCK_LUMA 3 | %macro DEBLOCK_LUMA 3 | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_%2_luma_%1, 5,5 | |||||
| cglobal deblock_%2_luma_8_%1, 5,5 | |||||
| lea r4, [r1*3] | lea r4, [r1*3] | ||||
| dec r2 ; alpha-1 | dec r2 ; alpha-1 | ||||
| neg r4 | neg r4 | ||||
| @@ -419,8 +424,7 @@ cglobal x264_deblock_%2_luma_%1, 5,5 | |||||
| DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | ||||
| pand m6, m4 | pand m6, m4 | ||||
| pand m4, [esp+%3] ; tc | pand m4, [esp+%3] ; tc | ||||
| mova m7, m4 | |||||
| psubb m7, m6 | |||||
| psubb m7, m4, m6 | |||||
| pand m6, m4 | pand m6, m4 | ||||
| LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 | LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 | ||||
| @@ -441,10 +445,10 @@ cglobal x264_deblock_%2_luma_%1, 5,5 | |||||
| RET | RET | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_MMX | INIT_MMX | ||||
| cglobal x264_deblock_h_luma_%1, 0,5 | |||||
| cglobal deblock_h_luma_8_%1, 0,5 | |||||
| mov r0, r0mp | mov r0, r0mp | ||||
| mov r3, r1m | mov r3, r1m | ||||
| lea r4, [r3*3] | lea r4, [r3*3] | ||||
| @@ -467,11 +471,11 @@ cglobal x264_deblock_h_luma_%1, 0,5 | |||||
| PUSH dword r2m | PUSH dword r2m | ||||
| PUSH dword 16 | PUSH dword 16 | ||||
| PUSH dword r0 | PUSH dword r0 | ||||
| call x264_deblock_%2_luma_%1 | |||||
| call deblock_%2_luma_8_%1 | |||||
| %ifidn %2, v8 | %ifidn %2, v8 | ||||
| add dword [esp ], 8 ; pix_tmp+0x38 | add dword [esp ], 8 ; pix_tmp+0x38 | ||||
| add dword [esp+16], 2 ; tc0+2 | add dword [esp+16], 2 ; tc0+2 | ||||
| call x264_deblock_%2_luma_%1 | |||||
| call deblock_%2_luma_8_%1 | |||||
| %endif | %endif | ||||
| ADD esp, 20 | ADD esp, 20 | ||||
| @@ -484,7 +488,7 @@ cglobal x264_deblock_h_luma_%1, 0,5 | |||||
| movq m1, [pix_tmp+0x20] | movq m1, [pix_tmp+0x20] | ||||
| movq m2, [pix_tmp+0x30] | movq m2, [pix_tmp+0x30] | ||||
| movq m3, [pix_tmp+0x40] | movq m3, [pix_tmp+0x40] | ||||
| TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4) | |||||
| lea r0, [r0+r3*8] | lea r0, [r0+r3*8] | ||||
| lea r1, [r1+r3*8] | lea r1, [r1+r3*8] | ||||
| @@ -492,7 +496,7 @@ cglobal x264_deblock_h_luma_%1, 0,5 | |||||
| movq m1, [pix_tmp+0x28] | movq m1, [pix_tmp+0x28] | ||||
| movq m2, [pix_tmp+0x38] | movq m2, [pix_tmp+0x38] | ||||
| movq m3, [pix_tmp+0x48] | movq m3, [pix_tmp+0x48] | ||||
| TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4) | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4) | |||||
| ADD esp, pad | ADD esp, pad | ||||
| RET | RET | ||||
| @@ -502,22 +506,34 @@ INIT_MMX | |||||
| DEBLOCK_LUMA mmxext, v8, 8 | DEBLOCK_LUMA mmxext, v8, 8 | ||||
| INIT_XMM | INIT_XMM | ||||
| DEBLOCK_LUMA sse2, v, 16 | DEBLOCK_LUMA sse2, v, 16 | ||||
| INIT_AVX | |||||
| DEBLOCK_LUMA avx, v, 16 | |||||
| %endif ; ARCH | %endif ; ARCH | ||||
| %macro LUMA_INTRA_P012 4 ; p0..p3 in memory | %macro LUMA_INTRA_P012 4 ; p0..p3 in memory | ||||
| %ifdef ARCH_X86_64 | |||||
| pavgb t0, p2, p1 | |||||
| pavgb t1, p0, q0 | |||||
| %else | |||||
| mova t0, p2 | mova t0, p2 | ||||
| mova t1, p0 | mova t1, p0 | ||||
| pavgb t0, p1 | pavgb t0, p1 | ||||
| pavgb t1, q0 | pavgb t1, q0 | ||||
| %endif | |||||
| pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 | pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 | ||||
| mova t5, t1 | mova t5, t1 | ||||
| %ifdef ARCH_X86_64 | |||||
| paddb t2, p2, p1 | |||||
| paddb t3, p0, q0 | |||||
| %else | |||||
| mova t2, p2 | mova t2, p2 | ||||
| mova t3, p0 | mova t3, p0 | ||||
| paddb t2, p1 | paddb t2, p1 | ||||
| paddb t3, q0 | paddb t3, q0 | ||||
| %endif | |||||
| paddb t2, t3 | paddb t2, t3 | ||||
| mova t3, t2 | mova t3, t2 | ||||
| mova t4, t2 | mova t4, t2 | ||||
| @@ -527,10 +543,15 @@ DEBLOCK_LUMA sse2, v, 16 | |||||
| pand t2, mpb_1 | pand t2, mpb_1 | ||||
| psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; | psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; | ||||
| %ifdef ARCH_X86_64 | |||||
| pavgb t1, p2, q1 | |||||
| psubb t2, p2, q1 | |||||
| %else | |||||
| mova t1, p2 | mova t1, p2 | ||||
| mova t2, p2 | mova t2, p2 | ||||
| pavgb t1, q1 | pavgb t1, q1 | ||||
| psubb t2, q1 | psubb t2, q1 | ||||
| %endif | |||||
| paddb t3, t3 | paddb t3, t3 | ||||
| psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1 | psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1 | ||||
| pand t2, mpb_1 | pand t2, mpb_1 | ||||
| @@ -543,10 +564,8 @@ DEBLOCK_LUMA sse2, v, 16 | |||||
| pand t3, mpb_1 | pand t3, mpb_1 | ||||
| psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8 | psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8 | ||||
| mova t3, p0 | |||||
| mova t2, p0 | |||||
| pxor t3, q1 | |||||
| pavgb t2, q1 | |||||
| pxor t3, p0, q1 | |||||
| pavgb t2, p0, q1 | |||||
| pand t3, mpb_1 | pand t3, mpb_1 | ||||
| psubb t2, t3 | psubb t2, t3 | ||||
| pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4 | pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4 | ||||
| @@ -560,9 +579,8 @@ DEBLOCK_LUMA sse2, v, 16 | |||||
| mova %1, t1 ; store p0 | mova %1, t1 ; store p0 | ||||
| mova t1, %4 ; p3 | mova t1, %4 ; p3 | ||||
| mova t2, t1 | |||||
| paddb t2, t1, p2 | |||||
| pavgb t1, p2 | pavgb t1, p2 | ||||
| paddb t2, p2 | |||||
| pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4 | pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4 | ||||
| paddb t2, t2 | paddb t2, t2 | ||||
| paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0 | paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0 | ||||
| @@ -624,9 +642,9 @@ DEBLOCK_LUMA sse2, v, 16 | |||||
| %endif | %endif | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_%2_luma_intra_%1, 4,6,16 | |||||
| cglobal deblock_%2_luma_intra_8_%1, 4,6,16 | |||||
| %ifndef ARCH_X86_64 | %ifndef ARCH_X86_64 | ||||
| sub esp, 0x60 | sub esp, 0x60 | ||||
| %endif | %endif | ||||
| @@ -686,9 +704,9 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16 | |||||
| INIT_MMX | INIT_MMX | ||||
| %ifdef ARCH_X86_64 | %ifdef ARCH_X86_64 | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_h_luma_intra_%1, 4,7 | |||||
| cglobal deblock_h_luma_intra_8_%1, 4,7 | |||||
| movsxd r10, r1d | movsxd r10, r1d | ||||
| lea r11, [r10*3] | lea r11, [r10*3] | ||||
| lea r6, [r0-4] | lea r6, [r0-4] | ||||
| @@ -704,7 +722,7 @@ cglobal x264_deblock_h_luma_intra_%1, 4,7 | |||||
| lea r0, [pix_tmp+0x40] | lea r0, [pix_tmp+0x40] | ||||
| mov r1, 0x10 | mov r1, 0x10 | ||||
| call x264_deblock_v_luma_intra_%1 | |||||
| call deblock_v_luma_intra_8_%1 | |||||
| ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) | ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) | ||||
| lea r5, [r6+r11] | lea r5, [r6+r11] | ||||
| @@ -717,7 +735,7 @@ cglobal x264_deblock_h_luma_intra_%1, 4,7 | |||||
| add rsp, 0x88 | add rsp, 0x88 | ||||
| RET | RET | ||||
| %else | %else | ||||
| cglobal x264_deblock_h_luma_intra_%1, 2,4 | |||||
| cglobal deblock_h_luma_intra_8_%1, 2,4 | |||||
| lea r3, [r1*3] | lea r3, [r1*3] | ||||
| sub r0, 4 | sub r0, 4 | ||||
| lea r2, [r0+r3] | lea r2, [r0+r3] | ||||
| @@ -736,10 +754,10 @@ cglobal x264_deblock_h_luma_intra_%1, 2,4 | |||||
| PUSH dword r2m | PUSH dword r2m | ||||
| PUSH dword 16 | PUSH dword 16 | ||||
| PUSH r0 | PUSH r0 | ||||
| call x264_deblock_%2_luma_intra_%1 | |||||
| call deblock_%2_luma_intra_8_%1 | |||||
| %ifidn %2, v8 | %ifidn %2, v8 | ||||
| add dword [rsp], 8 ; pix_tmp+8 | add dword [rsp], 8 ; pix_tmp+8 | ||||
| call x264_deblock_%2_luma_intra_%1 | |||||
| call deblock_%2_luma_intra_8_%1 | |||||
| %endif | %endif | ||||
| ADD esp, 16 | ADD esp, 16 | ||||
| @@ -760,13 +778,13 @@ cglobal x264_deblock_h_luma_intra_%1, 2,4 | |||||
| INIT_XMM | INIT_XMM | ||||
| DEBLOCK_LUMA_INTRA sse2, v | DEBLOCK_LUMA_INTRA sse2, v | ||||
| INIT_AVX | |||||
| DEBLOCK_LUMA_INTRA avx , v | |||||
| %ifndef ARCH_X86_64 | %ifndef ARCH_X86_64 | ||||
| INIT_MMX | INIT_MMX | ||||
| DEBLOCK_LUMA_INTRA mmxext, v8 | DEBLOCK_LUMA_INTRA mmxext, v8 | ||||
| %endif | %endif | ||||
| INIT_MMX | INIT_MMX | ||||
| %macro CHROMA_V_START 0 | %macro CHROMA_V_START 0 | ||||
| @@ -790,23 +808,23 @@ INIT_MMX | |||||
| %define t6 r6 | %define t6 r6 | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_v_chroma_mmxext, 5,6 | |||||
| cglobal deblock_v_chroma_8_mmxext, 5,6 | |||||
| CHROMA_V_START | CHROMA_V_START | ||||
| movq m0, [t5] | movq m0, [t5] | ||||
| movq m1, [t5+r1] | movq m1, [t5+r1] | ||||
| movq m2, [r0] | movq m2, [r0] | ||||
| movq m3, [r0+r1] | movq m3, [r0+r1] | ||||
| call x264_chroma_inter_body_mmxext | |||||
| call ff_chroma_inter_body_mmxext | |||||
| movq [t5+r1], m1 | movq [t5+r1], m1 | ||||
| movq [r0], m2 | movq [r0], m2 | ||||
| RET | RET | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_h_chroma_mmxext, 5,7 | |||||
| cglobal deblock_h_chroma_8_mmxext, 5,7 | |||||
| %ifdef ARCH_X86_64 | %ifdef ARCH_X86_64 | ||||
| %define buf0 [rsp-24] | %define buf0 [rsp-24] | ||||
| %define buf1 [rsp-16] | %define buf1 [rsp-16] | ||||
| @@ -815,17 +833,17 @@ cglobal x264_deblock_h_chroma_mmxext, 5,7 | |||||
| %define buf1 r2m | %define buf1 r2m | ||||
| %endif | %endif | ||||
| CHROMA_H_START | CHROMA_H_START | ||||
| TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6) | |||||
| TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) | |||||
| movq buf0, m0 | movq buf0, m0 | ||||
| movq buf1, m3 | movq buf1, m3 | ||||
| call x264_chroma_inter_body_mmxext | |||||
| call ff_chroma_inter_body_mmxext | |||||
| movq m0, buf0 | movq m0, buf0 | ||||
| movq m3, buf1 | movq m3, buf1 | ||||
| TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6) | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) | |||||
| RET | RET | ||||
| ALIGN 16 | ALIGN 16 | ||||
| x264_chroma_inter_body_mmxext: | |||||
| ff_chroma_inter_body_mmxext: | |||||
| LOAD_MASK r2d, r3d | LOAD_MASK r2d, r3d | ||||
| movd m6, [r4] ; tc0 | movd m6, [r4] ; tc0 | ||||
| punpcklbw m6, m6 | punpcklbw m6, m6 | ||||
| @@ -850,31 +868,31 @@ x264_chroma_inter_body_mmxext: | |||||
| %define t6 r5 | %define t6 r5 | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_v_chroma_intra_mmxext, 4,5 | |||||
| cglobal deblock_v_chroma_intra_8_mmxext, 4,5 | |||||
| CHROMA_V_START | CHROMA_V_START | ||||
| movq m0, [t5] | movq m0, [t5] | ||||
| movq m1, [t5+r1] | movq m1, [t5+r1] | ||||
| movq m2, [r0] | movq m2, [r0] | ||||
| movq m3, [r0+r1] | movq m3, [r0+r1] | ||||
| call x264_chroma_intra_body_mmxext | |||||
| call ff_chroma_intra_body_mmxext | |||||
| movq [t5+r1], m1 | movq [t5+r1], m1 | ||||
| movq [r0], m2 | movq [r0], m2 | ||||
| RET | RET | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void x264_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| cglobal x264_deblock_h_chroma_intra_mmxext, 4,6 | |||||
| cglobal deblock_h_chroma_intra_8_mmxext, 4,6 | |||||
| CHROMA_H_START | CHROMA_H_START | ||||
| TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6) | |||||
| call x264_chroma_intra_body_mmxext | |||||
| TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6) | |||||
| TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) | |||||
| call ff_chroma_intra_body_mmxext | |||||
| TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) | |||||
| RET | RET | ||||
| ALIGN 16 | ALIGN 16 | ||||
| x264_chroma_intra_body_mmxext: | |||||
| ff_chroma_intra_body_mmxext: | |||||
| LOAD_MASK r2d, r3d | LOAD_MASK r2d, r3d | ||||
| movq m5, m1 | movq m5, m1 | ||||
| movq m6, m2 | movq m6, m2 | ||||
| @@ -0,0 +1,910 @@ | |||||
| ;***************************************************************************** | |||||
| ;* MMX/SSE2/AVX-optimized 10-bit H.264 deblocking code | |||||
| ;***************************************************************************** | |||||
| ;* Copyright (C) 2005-2011 x264 project | |||||
| ;* | |||||
| ;* Authors: Oskar Arvidsson <oskar@irock.se> | |||||
| ;* Loren Merritt <lorenm@u.washington.edu> | |||||
| ;* Jason Garrett-Glaser <darkshikari@gmail.com> | |||||
| ;* | |||||
| ;* This file is part of Libav. | |||||
| ;* | |||||
| ;* Libav is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* Libav is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with Libav; if not, write to the Free Software | |||||
| ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "x86inc.asm" | |||||
| %include "x86util.asm" | |||||
| SECTION_RODATA | |||||
| pw_pixel_max: times 8 dw ((1 << 10)-1) | |||||
| SECTION .text | |||||
| cextern pw_2 | |||||
| cextern pw_3 | |||||
| cextern pw_4 | |||||
| ; out: %4 = |%1-%2|-%3 | |||||
| ; clobbers: %5 | |||||
| %macro ABS_SUB 5 | |||||
| psubusw %5, %2, %1 | |||||
| psubusw %4, %1, %2 | |||||
| por %4, %5 | |||||
| psubw %4, %3 | |||||
| %endmacro | |||||
| ; out: %4 = |%1-%2|<%3 | |||||
| %macro DIFF_LT 5 | |||||
| psubusw %4, %2, %1 | |||||
| psubusw %5, %1, %2 | |||||
| por %5, %4 ; |%1-%2| | |||||
| pxor %4, %4 | |||||
| psubw %5, %3 ; |%1-%2|-%3 | |||||
| pcmpgtw %4, %5 ; 0 > |%1-%2|-%3 | |||||
| %endmacro | |||||
| %macro LOAD_AB 4 | |||||
| movd %1, %3 | |||||
| movd %2, %4 | |||||
| SPLATW %1, %1 | |||||
| SPLATW %2, %2 | |||||
| %endmacro | |||||
| ; in: %2=tc reg | |||||
| ; out: %1=splatted tc | |||||
| %macro LOAD_TC 2 | |||||
| movd %1, [%2] | |||||
| punpcklbw %1, %1 | |||||
| %if mmsize == 8 | |||||
| pshufw %1, %1, 0 | |||||
| %else | |||||
| pshuflw %1, %1, 01010000b | |||||
| pshufd %1, %1, 01010000b | |||||
| %endif | |||||
| psraw %1, 6 | |||||
| %endmacro | |||||
| ; in: %1=p1, %2=p0, %3=q0, %4=q1 | |||||
| ; %5=alpha, %6=beta, %7-%9=tmp | |||||
| ; out: %7=mask | |||||
| %macro LOAD_MASK 9 | |||||
| ABS_SUB %2, %3, %5, %8, %7 ; |p0-q0| - alpha | |||||
| ABS_SUB %1, %2, %6, %9, %7 ; |p1-p0| - beta | |||||
| pand %8, %9 | |||||
| ABS_SUB %3, %4, %6, %9, %7 ; |q1-q0| - beta | |||||
| pxor %7, %7 | |||||
| pand %8, %9 | |||||
| pcmpgtw %7, %8 | |||||
| %endmacro | |||||
| ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp | |||||
| ; out: %1=p0', m2=q0' | |||||
| %macro DEBLOCK_P0_Q0 7 | |||||
| psubw %3, %4 | |||||
| pxor %7, %7 | |||||
| paddw %3, [pw_4] | |||||
| psubw %7, %5 | |||||
| psubw %6, %2, %1 | |||||
| psllw %6, 2 | |||||
| paddw %3, %6 | |||||
| psraw %3, 3 | |||||
| mova %6, [pw_pixel_max] | |||||
| CLIPW %3, %7, %5 | |||||
| pxor %7, %7 | |||||
| paddw %1, %3 | |||||
| psubw %2, %3 | |||||
| CLIPW %1, %7, %6 | |||||
| CLIPW %2, %7, %6 | |||||
| %endmacro | |||||
| ; in: %1=x2, %2=x1, %3=p0, %4=q0 %5=mask&tc, %6=tmp | |||||
| %macro LUMA_Q1 6 | |||||
| pavgw %6, %3, %4 ; (p0+q0+1)>>1 | |||||
| paddw %1, %6 | |||||
| pxor %6, %6 | |||||
| psraw %1, 1 | |||||
| psubw %6, %5 | |||||
| psubw %1, %2 | |||||
| CLIPW %1, %6, %5 | |||||
| paddw %1, %2 | |||||
| %endmacro | |||||
| %macro LUMA_DEBLOCK_ONE 3 | |||||
| DIFF_LT m5, %1, bm, m4, m6 | |||||
| pxor m6, m6 | |||||
| mova %3, m4 | |||||
| pcmpgtw m6, tcm | |||||
| pand m4, tcm | |||||
| pandn m6, m7 | |||||
| pand m4, m6 | |||||
| LUMA_Q1 m5, %2, m1, m2, m4, m6 | |||||
| %endmacro | |||||
| %macro LUMA_H_STORE 2 | |||||
| %if mmsize == 8 | |||||
| movq [r0-4], m0 | |||||
| movq [r0+r1-4], m1 | |||||
| movq [r0+r1*2-4], m2 | |||||
| movq [r0+%2-4], m3 | |||||
| %else | |||||
| movq [r0-4], m0 | |||||
| movhps [r0+r1-4], m0 | |||||
| movq [r0+r1*2-4], m1 | |||||
| movhps [%1-4], m1 | |||||
| movq [%1+r1-4], m2 | |||||
| movhps [%1+r1*2-4], m2 | |||||
| movq [%1+%2-4], m3 | |||||
| movhps [%1+r1*4-4], m3 | |||||
| %endif | |||||
| %endmacro | |||||
| %macro DEBLOCK_LUMA 1 | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) | |||||
| %assign pad 5*mmsize+12-(stack_offset&15) | |||||
| %define tcm [rsp] | |||||
| %define ms1 [rsp+mmsize] | |||||
| %define ms2 [rsp+mmsize*2] | |||||
| %define am [rsp+mmsize*3] | |||||
| %define bm [rsp+mmsize*4] | |||||
| SUB rsp, pad | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| LOAD_AB m4, m5, r2, r3 | |||||
| mov r3, 32/mmsize | |||||
| mov r2, r0 | |||||
| sub r0, r1 | |||||
| mova am, m4 | |||||
| sub r0, r1 | |||||
| mova bm, m5 | |||||
| sub r0, r1 | |||||
| .loop: | |||||
| mova m0, [r0+r1] | |||||
| mova m1, [r0+r1*2] | |||||
| mova m2, [r2] | |||||
| mova m3, [r2+r1] | |||||
| LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6 | |||||
| LOAD_TC m6, r4 | |||||
| mova tcm, m6 | |||||
| mova m5, [r0] | |||||
| LUMA_DEBLOCK_ONE m1, m0, ms1 | |||||
| mova [r0+r1], m5 | |||||
| mova m5, [r2+r1*2] | |||||
| LUMA_DEBLOCK_ONE m2, m3, ms2 | |||||
| mova [r2+r1], m5 | |||||
| pxor m5, m5 | |||||
| mova m6, tcm | |||||
| pcmpgtw m5, tcm | |||||
| psubw m6, ms1 | |||||
| pandn m5, m7 | |||||
| psubw m6, ms2 | |||||
| pand m5, m6 | |||||
| DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6 | |||||
| mova [r0+r1*2], m1 | |||||
| mova [r2], m2 | |||||
| add r0, mmsize | |||||
| add r2, mmsize | |||||
| add r4, mmsize/8 | |||||
| dec r3 | |||||
| jg .loop | |||||
| ADD rsp, pad | |||||
| RET | |||||
| cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) | |||||
| %assign pad 7*mmsize+12-(stack_offset&15) | |||||
| %define tcm [rsp] | |||||
| %define ms1 [rsp+mmsize] | |||||
| %define ms2 [rsp+mmsize*2] | |||||
| %define p1m [rsp+mmsize*3] | |||||
| %define p2m [rsp+mmsize*4] | |||||
| %define am [rsp+mmsize*5] | |||||
| %define bm [rsp+mmsize*6] | |||||
| SUB rsp, pad | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| LOAD_AB m4, m5, r2, r3 | |||||
| mov r3, r1 | |||||
| mova am, m4 | |||||
| add r3, r1 | |||||
| mov r5, 32/mmsize | |||||
| mova bm, m5 | |||||
| add r3, r1 | |||||
| %if mmsize == 16 | |||||
| mov r2, r0 | |||||
| add r2, r3 | |||||
| %endif | |||||
| .loop: | |||||
| %if mmsize == 8 | |||||
| movq m2, [r0-8] ; y q2 q1 q0 | |||||
| movq m7, [r0+0] | |||||
| movq m5, [r0+r1-8] | |||||
| movq m3, [r0+r1+0] | |||||
| movq m0, [r0+r1*2-8] | |||||
| movq m6, [r0+r1*2+0] | |||||
| movq m1, [r0+r3-8] | |||||
| TRANSPOSE4x4W 2, 5, 0, 1, 4 | |||||
| SWAP 2, 7 | |||||
| movq m7, [r0+r3] | |||||
| TRANSPOSE4x4W 2, 3, 6, 7, 4 | |||||
| %else | |||||
| movu m5, [r0-8] ; y q2 q1 q0 p0 p1 p2 x | |||||
| movu m0, [r0+r1-8] | |||||
| movu m2, [r0+r1*2-8] | |||||
| movu m3, [r2-8] | |||||
| TRANSPOSE4x4W 5, 0, 2, 3, 6 | |||||
| mova tcm, m3 | |||||
| movu m4, [r2+r1-8] | |||||
| movu m1, [r2+r1*2-8] | |||||
| movu m3, [r2+r3-8] | |||||
| movu m7, [r2+r1*4-8] | |||||
| TRANSPOSE4x4W 4, 1, 3, 7, 6 | |||||
| mova m6, tcm | |||||
| punpcklqdq m6, m7 | |||||
| punpckhqdq m5, m4 | |||||
| SBUTTERFLY qdq, 0, 1, 7 | |||||
| SBUTTERFLY qdq, 2, 3, 7 | |||||
| %endif | |||||
| mova p2m, m6 | |||||
| LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6 | |||||
| LOAD_TC m6, r4 | |||||
| mova tcm, m6 | |||||
| LUMA_DEBLOCK_ONE m1, m0, ms1 | |||||
| mova p1m, m5 | |||||
| mova m5, p2m | |||||
| LUMA_DEBLOCK_ONE m2, m3, ms2 | |||||
| mova p2m, m5 | |||||
| pxor m5, m5 | |||||
| mova m6, tcm | |||||
| pcmpgtw m5, tcm | |||||
| psubw m6, ms1 | |||||
| pandn m5, m7 | |||||
| psubw m6, ms2 | |||||
| pand m5, m6 | |||||
| DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6 | |||||
| mova m0, p1m | |||||
| mova m3, p2m | |||||
| TRANSPOSE4x4W 0, 1, 2, 3, 4 | |||||
| LUMA_H_STORE r2, r3 | |||||
| add r4, mmsize/8 | |||||
| lea r0, [r0+r1*(mmsize/2)] | |||||
| lea r2, [r2+r1*(mmsize/2)] | |||||
| dec r5 | |||||
| jg .loop | |||||
| ADD rsp, pad | |||||
| RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| %ifdef ARCH_X86_64 | |||||
| ; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 | |||||
| ; m12=alpha, m13=beta | |||||
| ; out: m0=p1', m3=q1', m1=p0', m2=q0' | |||||
| ; clobbers: m4, m5, m6, m7, m10, m11, m14 | |||||
| %macro DEBLOCK_LUMA_INTER_SSE2 0 | |||||
| LOAD_MASK m0, m1, m2, m3, m12, m13, m7, m4, m6 | |||||
| LOAD_TC m6, r4 | |||||
| DIFF_LT m8, m1, m13, m10, m4 | |||||
| DIFF_LT m9, m2, m13, m11, m4 | |||||
| pand m6, m7 | |||||
| mova m14, m6 | |||||
| pxor m4, m4 | |||||
| pcmpgtw m6, m4 | |||||
| pand m6, m14 | |||||
| mova m5, m10 | |||||
| pand m5, m6 | |||||
| LUMA_Q1 m8, m0, m1, m2, m5, m4 | |||||
| mova m5, m11 | |||||
| pand m5, m6 | |||||
| LUMA_Q1 m9, m3, m1, m2, m5, m4 | |||||
| pxor m4, m4 | |||||
| psubw m6, m10 | |||||
| pcmpgtw m4, m14 | |||||
| pandn m4, m7 | |||||
| psubw m6, m11 | |||||
| pand m4, m6 | |||||
| DEBLOCK_P0_Q0 m1, m2, m0, m3, m4, m5, m6 | |||||
| SWAP 0, 8 | |||||
| SWAP 3, 9 | |||||
| %endmacro | |||||
| %macro DEBLOCK_LUMA_64 1 | |||||
| cglobal deblock_v_luma_10_%1, 5,5,15 | |||||
| %define p2 m8 | |||||
| %define p1 m0 | |||||
| %define p0 m1 | |||||
| %define q0 m2 | |||||
| %define q1 m3 | |||||
| %define q2 m9 | |||||
| %define mask0 m7 | |||||
| %define mask1 m10 | |||||
| %define mask2 m11 | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| LOAD_AB m12, m13, r2, r3 | |||||
| mov r2, r0 | |||||
| sub r0, r1 | |||||
| sub r0, r1 | |||||
| sub r0, r1 | |||||
| mov r3, 2 | |||||
| .loop: | |||||
| mova p2, [r0] | |||||
| mova p1, [r0+r1] | |||||
| mova p0, [r0+r1*2] | |||||
| mova q0, [r2] | |||||
| mova q1, [r2+r1] | |||||
| mova q2, [r2+r1*2] | |||||
| DEBLOCK_LUMA_INTER_SSE2 | |||||
| mova [r0+r1], p1 | |||||
| mova [r0+r1*2], p0 | |||||
| mova [r2], q0 | |||||
| mova [r2+r1], q1 | |||||
| add r0, mmsize | |||||
| add r2, mmsize | |||||
| add r4, 2 | |||||
| dec r3 | |||||
| jg .loop | |||||
| REP_RET | |||||
| cglobal deblock_h_luma_10_%1, 5,7,15 | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| LOAD_AB m12, m13, r2, r3 | |||||
| mov r2, r1 | |||||
| add r2, r1 | |||||
| add r2, r1 | |||||
| mov r5, r0 | |||||
| add r5, r2 | |||||
| mov r6, 2 | |||||
| .loop: | |||||
| movu m8, [r0-8] ; y q2 q1 q0 p0 p1 p2 x | |||||
| movu m0, [r0+r1-8] | |||||
| movu m2, [r0+r1*2-8] | |||||
| movu m9, [r5-8] | |||||
| movu m5, [r5+r1-8] | |||||
| movu m1, [r5+r1*2-8] | |||||
| movu m3, [r5+r2-8] | |||||
| movu m7, [r5+r1*4-8] | |||||
| TRANSPOSE4x4W 8, 0, 2, 9, 10 | |||||
| TRANSPOSE4x4W 5, 1, 3, 7, 10 | |||||
| punpckhqdq m8, m5 | |||||
| SBUTTERFLY qdq, 0, 1, 10 | |||||
| SBUTTERFLY qdq, 2, 3, 10 | |||||
| punpcklqdq m9, m7 | |||||
| DEBLOCK_LUMA_INTER_SSE2 | |||||
| TRANSPOSE4x4W 0, 1, 2, 3, 4 | |||||
| LUMA_H_STORE r5, r2 | |||||
| add r4, 2 | |||||
| lea r0, [r0+r1*8] | |||||
| lea r5, [r5+r1*8] | |||||
| dec r6 | |||||
| jg .loop | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| DEBLOCK_LUMA_64 sse2 | |||||
| INIT_AVX | |||||
| DEBLOCK_LUMA_64 avx | |||||
| %endif | |||||
| %macro SWAPMOVA 2 | |||||
| %ifid %1 | |||||
| SWAP %1, %2 | |||||
| %else | |||||
| mova %1, %2 | |||||
| %endif | |||||
| %endmacro | |||||
| ; in: t0-t2: tmp registers | |||||
| ; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0 | |||||
| ; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2' | |||||
| %macro LUMA_INTRA_P012 12 ; p0..p3 in memory | |||||
| %ifdef ARCH_X86_64 | |||||
| paddw t0, %3, %2 | |||||
| mova t2, %4 | |||||
| paddw t2, %3 | |||||
| %else | |||||
| mova t0, %3 | |||||
| mova t2, %4 | |||||
| paddw t0, %2 | |||||
| paddw t2, %3 | |||||
| %endif | |||||
| paddw t0, %1 | |||||
| paddw t2, t2 | |||||
| paddw t0, %5 | |||||
| paddw t2, %9 | |||||
| paddw t0, %9 ; (p2 + p1 + p0 + q0 + 2) | |||||
| paddw t2, t0 ; (2*p3 + 3*p2 + p1 + p0 + q0 + 4) | |||||
| psrlw t2, 3 | |||||
| psrlw t1, t0, 2 | |||||
| psubw t2, %3 | |||||
| psubw t1, %2 | |||||
| pand t2, %8 | |||||
| pand t1, %8 | |||||
| paddw t2, %3 | |||||
| paddw t1, %2 | |||||
| SWAPMOVA %11, t1 | |||||
| psubw t1, t0, %3 | |||||
| paddw t0, t0 | |||||
| psubw t1, %5 | |||||
| psubw t0, %3 | |||||
| paddw t1, %6 | |||||
| paddw t1, %2 | |||||
| paddw t0, %6 | |||||
| psrlw t1, 2 ; (2*p1 + p0 + q1 + 2)/4 | |||||
| psrlw t0, 3 ; (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 | |||||
| pxor t0, t1 | |||||
| pxor t1, %1 | |||||
| pand t0, %8 | |||||
| pand t1, %7 | |||||
| pxor t0, t1 | |||||
| pxor t0, %1 | |||||
| SWAPMOVA %10, t0 | |||||
| SWAPMOVA %12, t2 | |||||
| %endmacro | |||||
| %macro LUMA_INTRA_INIT 1 | |||||
| %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15) | |||||
| %define t0 m4 | |||||
| %define t1 m5 | |||||
| %define t2 m6 | |||||
| %define t3 m7 | |||||
| %assign i 4 | |||||
| %rep %1 | |||||
| CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] | |||||
| %assign i i+1 | |||||
| %endrep | |||||
| SUB rsp, pad | |||||
| %endmacro | |||||
| ; in: %1-%3=tmp, %4=p2, %5=q2 | |||||
| %macro LUMA_INTRA_INTER 5 | |||||
| LOAD_AB t0, t1, r2d, r3d | |||||
| mova %1, t0 | |||||
| LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3 | |||||
| %ifdef ARCH_X86_64 | |||||
| mova %2, t0 ; mask0 | |||||
| psrlw t3, %1, 2 | |||||
| %else | |||||
| mova t3, %1 | |||||
| mova %2, t0 ; mask0 | |||||
| psrlw t3, 2 | |||||
| %endif | |||||
| paddw t3, [pw_2] ; alpha/4+2 | |||||
| DIFF_LT m1, m2, t3, t2, t0 ; t2 = |p0-q0| < alpha/4+2 | |||||
| pand t2, %2 | |||||
| mova t3, %5 ; q2 | |||||
| mova %1, t2 ; mask1 | |||||
| DIFF_LT t3, m2, t1, t2, t0 ; t2 = |q2-q0| < beta | |||||
| pand t2, %1 | |||||
| mova t3, %4 ; p2 | |||||
| mova %3, t2 ; mask1q | |||||
| DIFF_LT t3, m1, t1, t2, t0 ; t2 = |p2-p0| < beta | |||||
| pand t2, %1 | |||||
| mova %1, t2 ; mask1p | |||||
| %endmacro | |||||
| %macro LUMA_H_INTRA_LOAD 0 | |||||
| %if mmsize == 8 | |||||
| movu t0, [r0-8] | |||||
| movu t1, [r0+r1-8] | |||||
| movu m0, [r0+r1*2-8] | |||||
| movu m1, [r0+r4-8] | |||||
| TRANSPOSE4x4W 4, 5, 0, 1, 2 | |||||
| mova t4, t0 ; p3 | |||||
| mova t5, t1 ; p2 | |||||
| movu m2, [r0] | |||||
| movu m3, [r0+r1] | |||||
| movu t0, [r0+r1*2] | |||||
| movu t1, [r0+r4] | |||||
| TRANSPOSE4x4W 2, 3, 4, 5, 6 | |||||
| mova t6, t0 ; q2 | |||||
| mova t7, t1 ; q3 | |||||
| %else | |||||
| movu t0, [r0-8] | |||||
| movu t1, [r0+r1-8] | |||||
| movu m0, [r0+r1*2-8] | |||||
| movu m1, [r0+r5-8] | |||||
| movu m2, [r4-8] | |||||
| movu m3, [r4+r1-8] | |||||
| movu t2, [r4+r1*2-8] | |||||
| movu t3, [r4+r5-8] | |||||
| TRANSPOSE8x8W 4, 5, 0, 1, 2, 3, 6, 7, t4, t5 | |||||
| mova t4, t0 ; p3 | |||||
| mova t5, t1 ; p2 | |||||
| mova t6, t2 ; q2 | |||||
| mova t7, t3 ; q3 | |||||
| %endif | |||||
| %endmacro | |||||
| ; in: %1=q3 %2=q2' %3=q1' %4=q0' %5=p0' %6=p1' %7=p2' %8=p3 %9=tmp | |||||
| %macro LUMA_H_INTRA_STORE 9 | |||||
| %if mmsize == 8 | |||||
| TRANSPOSE4x4W %1, %2, %3, %4, %9 | |||||
| movq [r0-8], m%1 | |||||
| movq [r0+r1-8], m%2 | |||||
| movq [r0+r1*2-8], m%3 | |||||
| movq [r0+r4-8], m%4 | |||||
| movq m%1, %8 | |||||
| TRANSPOSE4x4W %5, %6, %7, %1, %9 | |||||
| movq [r0], m%5 | |||||
| movq [r0+r1], m%6 | |||||
| movq [r0+r1*2], m%7 | |||||
| movq [r0+r4], m%1 | |||||
| %else | |||||
| TRANSPOSE2x4x4W %1, %2, %3, %4, %9 | |||||
| movq [r0-8], m%1 | |||||
| movq [r0+r1-8], m%2 | |||||
| movq [r0+r1*2-8], m%3 | |||||
| movq [r0+r5-8], m%4 | |||||
| movhps [r4-8], m%1 | |||||
| movhps [r4+r1-8], m%2 | |||||
| movhps [r4+r1*2-8], m%3 | |||||
| movhps [r4+r5-8], m%4 | |||||
| %ifnum %8 | |||||
| SWAP %1, %8 | |||||
| %else | |||||
| mova m%1, %8 | |||||
| %endif | |||||
| TRANSPOSE2x4x4W %5, %6, %7, %1, %9 | |||||
| movq [r0], m%5 | |||||
| movq [r0+r1], m%6 | |||||
| movq [r0+r1*2], m%7 | |||||
| movq [r0+r5], m%1 | |||||
| movhps [r4], m%5 | |||||
| movhps [r4+r1], m%6 | |||||
| movhps [r4+r1*2], m%7 | |||||
| movhps [r4+r5], m%1 | |||||
| %endif | |||||
| %endmacro | |||||
| %ifdef ARCH_X86_64 | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| %macro DEBLOCK_LUMA_INTRA_64 1 | |||||
| cglobal deblock_v_luma_intra_10_%1, 4,7,16 | |||||
| %define t0 m1 | |||||
| %define t1 m2 | |||||
| %define t2 m4 | |||||
| %define p2 m8 | |||||
| %define p1 m9 | |||||
| %define p0 m10 | |||||
| %define q0 m11 | |||||
| %define q1 m12 | |||||
| %define q2 m13 | |||||
| %define aa m5 | |||||
| %define bb m14 | |||||
| lea r4, [r1*4] | |||||
| lea r5, [r1*3] ; 3*stride | |||||
| neg r4 | |||||
| add r4, r0 ; pix-4*stride | |||||
| mov r6, 2 | |||||
| mova m0, [pw_2] | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| LOAD_AB aa, bb, r2d, r3d | |||||
| .loop | |||||
| mova p2, [r4+r1] | |||||
| mova p1, [r4+2*r1] | |||||
| mova p0, [r4+r5] | |||||
| mova q0, [r0] | |||||
| mova q1, [r0+r1] | |||||
| mova q2, [r0+2*r1] | |||||
| LOAD_MASK p1, p0, q0, q1, aa, bb, m3, t0, t1 | |||||
| mova t2, aa | |||||
| psrlw t2, 2 | |||||
| paddw t2, m0 ; alpha/4+2 | |||||
| DIFF_LT p0, q0, t2, m6, t0 ; m6 = |p0-q0| < alpha/4+2 | |||||
| DIFF_LT p2, p0, bb, t1, t0 ; m7 = |p2-p0| < beta | |||||
| DIFF_LT q2, q0, bb, m7, t0 ; t1 = |q2-q0| < beta | |||||
| pand m6, m3 | |||||
| pand m7, m6 | |||||
| pand m6, t1 | |||||
| LUMA_INTRA_P012 p0, p1, p2, [r4], q0, q1, m3, m6, m0, [r4+r5], [r4+2*r1], [r4+r1] | |||||
| LUMA_INTRA_P012 q0, q1, q2, [r0+r5], p0, p1, m3, m7, m0, [r0], [r0+r1], [r0+2*r1] | |||||
| add r0, mmsize | |||||
| add r4, mmsize | |||||
| dec r6 | |||||
| jg .loop | |||||
| REP_RET | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_h_luma_intra_10_%1, 4,7,16 | |||||
| %define t0 m15 | |||||
| %define t1 m14 | |||||
| %define t2 m2 | |||||
| %define q3 m5 | |||||
| %define q2 m8 | |||||
| %define q1 m9 | |||||
| %define q0 m10 | |||||
| %define p0 m11 | |||||
| %define p1 m12 | |||||
| %define p2 m13 | |||||
| %define p3 m4 | |||||
| %define spill [rsp] | |||||
| %assign pad 24-(stack_offset&15) | |||||
| SUB rsp, pad | |||||
| lea r4, [r1*4] | |||||
| lea r5, [r1*3] ; 3*stride | |||||
| add r4, r0 ; pix+4*stride | |||||
| mov r6, 2 | |||||
| mova m0, [pw_2] | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| .loop | |||||
| movu q3, [r0-8] | |||||
| movu q2, [r0+r1-8] | |||||
| movu q1, [r0+r1*2-8] | |||||
| movu q0, [r0+r5-8] | |||||
| movu p0, [r4-8] | |||||
| movu p1, [r4+r1-8] | |||||
| movu p2, [r4+r1*2-8] | |||||
| movu p3, [r4+r5-8] | |||||
| TRANSPOSE8x8W 5, 8, 9, 10, 11, 12, 13, 4, 1 | |||||
| LOAD_AB m1, m2, r2d, r3d | |||||
| LOAD_MASK q1, q0, p0, p1, m1, m2, m3, t0, t1 | |||||
| psrlw m1, 2 | |||||
| paddw m1, m0 ; alpha/4+2 | |||||
| DIFF_LT p0, q0, m1, m6, t0 ; m6 = |p0-q0| < alpha/4+2 | |||||
| DIFF_LT q2, q0, m2, t1, t0 ; t1 = |q2-q0| < beta | |||||
| DIFF_LT p0, p2, m2, m7, t0 ; m7 = |p2-p0| < beta | |||||
| pand m6, m3 | |||||
| pand m7, m6 | |||||
| pand m6, t1 | |||||
| mova spill, q3 | |||||
| LUMA_INTRA_P012 q0, q1, q2, q3, p0, p1, m3, m6, m0, m5, m1, q2 | |||||
| LUMA_INTRA_P012 p0, p1, p2, p3, q0, q1, m3, m7, m0, p0, m6, p2 | |||||
| mova m7, spill | |||||
| LUMA_H_INTRA_STORE 7, 8, 1, 5, 11, 6, 13, 4, 14 | |||||
| lea r0, [r0+r1*8] | |||||
| lea r4, [r4+r1*8] | |||||
| dec r6 | |||||
| jg .loop | |||||
| ADD rsp, pad | |||||
| RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| DEBLOCK_LUMA_INTRA_64 sse2 | |||||
| INIT_AVX | |||||
| DEBLOCK_LUMA_INTRA_64 avx | |||||
| %endif | |||||
| %macro DEBLOCK_LUMA_INTRA 1 | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16) | |||||
| LUMA_INTRA_INIT 3 | |||||
| lea r4, [r1*4] | |||||
| lea r5, [r1*3] | |||||
| neg r4 | |||||
| add r4, r0 | |||||
| mov r6, 32/mmsize | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| .loop: | |||||
| mova m0, [r4+r1*2] ; p1 | |||||
| mova m1, [r4+r5] ; p0 | |||||
| mova m2, [r0] ; q0 | |||||
| mova m3, [r0+r1] ; q1 | |||||
| LUMA_INTRA_INTER t4, t5, t6, [r4+r1], [r0+r1*2] | |||||
| LUMA_INTRA_P012 m1, m0, t3, [r4], m2, m3, t5, t4, [pw_2], [r4+r5], [r4+2*r1], [r4+r1] | |||||
| mova t3, [r0+r1*2] ; q2 | |||||
| LUMA_INTRA_P012 m2, m3, t3, [r0+r5], m1, m0, t5, t6, [pw_2], [r0], [r0+r1], [r0+2*r1] | |||||
| add r0, mmsize | |||||
| add r4, mmsize | |||||
| dec r6 | |||||
| jg .loop | |||||
| ADD rsp, pad | |||||
| RET | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) | |||||
| LUMA_INTRA_INIT 8 | |||||
| %if mmsize == 8 | |||||
| lea r4, [r1*3] | |||||
| mov r5, 32/mmsize | |||||
| %else | |||||
| lea r4, [r1*4] | |||||
| lea r5, [r1*3] ; 3*stride | |||||
| add r4, r0 ; pix+4*stride | |||||
| mov r6, 32/mmsize | |||||
| %endif | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| .loop: | |||||
| LUMA_H_INTRA_LOAD | |||||
| LUMA_INTRA_INTER t8, t9, t10, t5, t6 | |||||
| LUMA_INTRA_P012 m1, m0, t3, t4, m2, m3, t9, t8, [pw_2], t8, t5, t11 | |||||
| mova t3, t6 ; q2 | |||||
| LUMA_INTRA_P012 m2, m3, t3, t7, m1, m0, t9, t10, [pw_2], m4, t6, m5 | |||||
| mova m2, t4 | |||||
| mova m0, t11 | |||||
| mova m1, t5 | |||||
| mova m3, t8 | |||||
| mova m6, t6 | |||||
| LUMA_H_INTRA_STORE 2, 0, 1, 3, 4, 6, 5, t7, 7 | |||||
| lea r0, [r0+r1*(mmsize/2)] | |||||
| %if mmsize == 8 | |||||
| dec r5 | |||||
| %else | |||||
| lea r4, [r4+r1*(mmsize/2)] | |||||
| dec r6 | |||||
| %endif | |||||
| jg .loop | |||||
| ADD rsp, pad | |||||
| RET | |||||
| %endmacro | |||||
| %ifndef ARCH_X86_64 | |||||
| INIT_MMX | |||||
| DEBLOCK_LUMA mmxext | |||||
| DEBLOCK_LUMA_INTRA mmxext | |||||
| INIT_XMM | |||||
| DEBLOCK_LUMA sse2 | |||||
| DEBLOCK_LUMA_INTRA sse2 | |||||
| INIT_AVX | |||||
| DEBLOCK_LUMA avx | |||||
| DEBLOCK_LUMA_INTRA avx | |||||
| %endif | |||||
| ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp | |||||
| ; out: %1=p0', %2=q0' | |||||
| %macro CHROMA_DEBLOCK_P0_Q0_INTRA 7 | |||||
| mova %6, [pw_2] | |||||
| paddw %6, %3 | |||||
| paddw %6, %4 | |||||
| paddw %7, %6, %2 | |||||
| paddw %6, %1 | |||||
| paddw %6, %3 | |||||
| paddw %7, %4 | |||||
| psraw %6, 2 | |||||
| psraw %7, 2 | |||||
| psubw %6, %1 | |||||
| psubw %7, %2 | |||||
| pand %6, %5 | |||||
| pand %7, %5 | |||||
| paddw %1, %6 | |||||
| paddw %2, %7 | |||||
| %endmacro | |||||
| %macro CHROMA_V_LOAD 1 | |||||
| mova m0, [r0] ; p1 | |||||
| mova m1, [r0+r1] ; p0 | |||||
| mova m2, [%1] ; q0 | |||||
| mova m3, [%1+r1] ; q1 | |||||
| %endmacro | |||||
| %macro CHROMA_V_STORE 0 | |||||
| mova [r0+1*r1], m1 | |||||
| mova [r0+2*r1], m2 | |||||
| %endmacro | |||||
| %macro DEBLOCK_CHROMA 1 | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) | |||||
| mov r5, r0 | |||||
| sub r0, r1 | |||||
| sub r0, r1 | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| %if mmsize < 16 | |||||
| mov r6, 16/mmsize | |||||
| .loop: | |||||
| %endif | |||||
| CHROMA_V_LOAD r5 | |||||
| LOAD_AB m4, m5, r2, r3 | |||||
| LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 | |||||
| pxor m4, m4 | |||||
| LOAD_TC m6, r4 | |||||
| psubw m6, [pw_3] | |||||
| pmaxsw m6, m4 | |||||
| pand m7, m6 | |||||
| DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6 | |||||
| CHROMA_V_STORE | |||||
| %if mmsize < 16 | |||||
| add r0, mmsize | |||||
| add r5, mmsize | |||||
| add r4, mmsize/8 | |||||
| dec r6 | |||||
| jg .loop | |||||
| REP_RET | |||||
| %else | |||||
| RET | |||||
| %endif | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||||
| ;----------------------------------------------------------------------------- | |||||
| cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) | |||||
| mov r4, r0 | |||||
| sub r0, r1 | |||||
| sub r0, r1 | |||||
| shl r2d, 2 | |||||
| shl r3d, 2 | |||||
| %if mmsize < 16 | |||||
| mov r5, 16/mmsize | |||||
| .loop: | |||||
| %endif | |||||
| CHROMA_V_LOAD r4 | |||||
| LOAD_AB m4, m5, r2, r3 | |||||
| LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 | |||||
| CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 | |||||
| CHROMA_V_STORE | |||||
| %if mmsize < 16 | |||||
| add r0, mmsize | |||||
| add r4, mmsize | |||||
| dec r5 | |||||
| jg .loop | |||||
| REP_RET | |||||
| %else | |||||
| RET | |||||
| %endif | |||||
| %endmacro | |||||
| %ifndef ARCH_X86_64 | |||||
| INIT_MMX | |||||
| DEBLOCK_CHROMA mmxext | |||||
| %endif | |||||
| INIT_XMM | |||||
| DEBLOCK_CHROMA sse2 | |||||
| INIT_AVX | |||||
| DEBLOCK_CHROMA avx | |||||
| @@ -218,41 +218,57 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | |||||
| ); | ); | ||||
| } | } | ||||
| #define LF_FUNC(DIR, TYPE, OPT) \ | |||||
| void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
| int alpha, int beta, int8_t *tc0); | |||||
| #define LF_IFUNC(DIR, TYPE, OPT) \ | |||||
| void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
| int alpha, int beta); | |||||
| LF_FUNC (h, chroma, mmxext) | |||||
| LF_IFUNC(h, chroma_intra, mmxext) | |||||
| LF_FUNC (v, chroma, mmxext) | |||||
| LF_IFUNC(v, chroma_intra, mmxext) | |||||
| LF_FUNC (h, luma, mmxext) | |||||
| LF_IFUNC(h, luma_intra, mmxext) | |||||
| #if HAVE_YASM && ARCH_X86_32 | |||||
| LF_FUNC (v8, luma, mmxext) | |||||
| static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | |||||
| #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ | |||||
| void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
| int alpha, int beta, int8_t *tc0); | |||||
| #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ | |||||
| void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
| int alpha, int beta); | |||||
| #define LF_FUNCS(type, depth)\ | |||||
| LF_FUNC (h, chroma, depth, mmxext)\ | |||||
| LF_IFUNC(h, chroma_intra, depth, mmxext)\ | |||||
| LF_FUNC (v, chroma, depth, mmxext)\ | |||||
| LF_IFUNC(v, chroma_intra, depth, mmxext)\ | |||||
| LF_FUNC (h, luma, depth, mmxext)\ | |||||
| LF_IFUNC(h, luma_intra, depth, mmxext)\ | |||||
| LF_FUNC (h, luma, depth, sse2)\ | |||||
| LF_IFUNC(h, luma_intra, depth, sse2)\ | |||||
| LF_FUNC (v, luma, depth, sse2)\ | |||||
| LF_IFUNC(v, luma_intra, depth, sse2)\ | |||||
| LF_FUNC (h, chroma, depth, sse2)\ | |||||
| LF_IFUNC(h, chroma_intra, depth, sse2)\ | |||||
| LF_FUNC (v, chroma, depth, sse2)\ | |||||
| LF_IFUNC(v, chroma_intra, depth, sse2)\ | |||||
| LF_FUNC (h, luma, depth, avx)\ | |||||
| LF_IFUNC(h, luma_intra, depth, avx)\ | |||||
| LF_FUNC (v, luma, depth, avx)\ | |||||
| LF_IFUNC(v, luma_intra, depth, avx)\ | |||||
| LF_FUNC (h, chroma, depth, avx)\ | |||||
| LF_IFUNC(h, chroma_intra, depth, avx)\ | |||||
| LF_FUNC (v, chroma, depth, avx)\ | |||||
| LF_IFUNC(v, chroma_intra, depth, avx) | |||||
| LF_FUNCS( uint8_t, 8) | |||||
| LF_FUNCS(uint16_t, 10) | |||||
| LF_FUNC (v8, luma, 8, mmxext) | |||||
| static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | |||||
| { | { | ||||
| if((tc0[0] & tc0[1]) >= 0) | if((tc0[0] & tc0[1]) >= 0) | ||||
| ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0); | |||||
| ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0); | |||||
| if((tc0[2] & tc0[3]) >= 0) | if((tc0[2] & tc0[3]) >= 0) | ||||
| ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); | |||||
| ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2); | |||||
| } | } | ||||
| LF_IFUNC(v8, luma_intra, mmxext) | |||||
| static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) | |||||
| LF_IFUNC(v8, luma_intra, 8, mmxext) | |||||
| static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta) | |||||
| { | { | ||||
| ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); | |||||
| ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); | |||||
| ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta); | |||||
| ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta); | |||||
| } | } | ||||
| #endif | |||||
| LF_FUNC (h, luma, sse2) | |||||
| LF_IFUNC(h, luma_intra, sse2) | |||||
| LF_FUNC (v, luma, sse2) | |||||
| LF_IFUNC(v, luma_intra, sse2) | |||||
| LF_FUNC (v, luma, 10, mmxext) | |||||
| LF_IFUNC(v, luma_intra, 10, mmxext) | |||||
| /***********************************/ | /***********************************/ | ||||
| /* weighted prediction */ | /* weighted prediction */ | ||||
| @@ -314,15 +330,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
| c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | ||||
| c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | ||||
| c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext; | |||||
| c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext; | |||||
| c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext; | |||||
| c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext; | |||||
| c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; | |||||
| c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; | |||||
| c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; | |||||
| c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; | |||||
| #if ARCH_X86_32 | #if ARCH_X86_32 | ||||
| c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext; | |||||
| c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext; | |||||
| c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; | |||||
| c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; | |||||
| c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; | |||||
| c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; | |||||
| #endif | #endif | ||||
| c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | ||||
| c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | ||||
| @@ -360,10 +376,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
| c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; | c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; | ||||
| #if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
| c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | |||||
| c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | |||||
| c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; | |||||
| c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; | |||||
| c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; | |||||
| c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; | |||||
| #endif | #endif | ||||
| c->h264_idct_add16 = ff_h264_idct_add16_sse2; | c->h264_idct_add16 = ff_h264_idct_add16_sse2; | ||||
| @@ -377,6 +393,49 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
| c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; | c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; | ||||
| c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3; | c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3; | ||||
| } | } | ||||
| if (mm_flags&AV_CPU_FLAG_AVX) { | |||||
| #if HAVE_ALIGNED_STACK | |||||
| c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; | |||||
| c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; | |||||
| #endif | |||||
| } | |||||
| } | |||||
| } | |||||
| #endif | |||||
| } else if (bit_depth == 10) { | |||||
| #if HAVE_YASM | |||||
| if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| #if ARCH_X86_32 | |||||
| c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext; | |||||
| c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext; | |||||
| c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext; | |||||
| c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; | |||||
| #endif | |||||
| if (mm_flags&AV_CPU_FLAG_SSE2) { | |||||
| c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; | |||||
| c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; | |||||
| #if HAVE_ALIGNED_STACK | |||||
| c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; | |||||
| c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; | |||||
| #endif | |||||
| } | |||||
| if (mm_flags&AV_CPU_FLAG_AVX) { | |||||
| c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx; | |||||
| c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx; | |||||
| #if HAVE_ALIGNED_STACK | |||||
| c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; | |||||
| c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; | |||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; | |||||
| #endif | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -24,16 +24,20 @@ | |||||
| ;****************************************************************************** | ;****************************************************************************** | ||||
| %macro SBUTTERFLY 4 | %macro SBUTTERFLY 4 | ||||
| %if avx_enabled == 0 | |||||
| mova m%4, m%2 | mova m%4, m%2 | ||||
| punpckl%1 m%2, m%3 | punpckl%1 m%2, m%3 | ||||
| punpckh%1 m%4, m%3 | punpckh%1 m%4, m%3 | ||||
| %else | |||||
| punpckh%1 m%4, m%2, m%3 | |||||
| punpckl%1 m%2, m%3 | |||||
| %endif | |||||
| SWAP %3, %4 | SWAP %3, %4 | ||||
| %endmacro | %endmacro | ||||
| %macro SBUTTERFLY2 4 | %macro SBUTTERFLY2 4 | ||||
| mova m%4, m%2 | |||||
| punpckh%1 m%2, m%3 | |||||
| punpckl%1 m%4, m%3 | |||||
| punpckl%1 m%4, m%2, m%3 | |||||
| punpckh%1 m%2, m%2, m%3 | |||||
| SWAP %2, %4, %3 | SWAP %2, %4, %3 | ||||
| %endmacro | %endmacro | ||||
| @@ -444,3 +448,17 @@ | |||||
| %macro PMINUB_MMXEXT 3 ; dst, src, ignored | %macro PMINUB_MMXEXT 3 ; dst, src, ignored | ||||
| pminub %1, %2 | pminub %1, %2 | ||||
| %endmacro | %endmacro | ||||
| %macro SPLATW 2-3 0 | |||||
| %if mmsize == 16 | |||||
| pshuflw %1, %2, (%3)*0x55 | |||||
| punpcklqdq %1, %1 | |||||
| %else | |||||
| pshufw %1, %2, (%3)*0x55 | |||||
| %endif | |||||
| %endmacro | |||||
| %macro CLIPW 3 ;(dst, min, max) | |||||
| pmaxsw %1, %2 | |||||
| pminsw %1, %3 | |||||
| %endmacro | |||||
| @@ -524,6 +524,7 @@ static const StreamType MISC_types[] = { | |||||
| static const StreamType REGD_types[] = { | static const StreamType REGD_types[] = { | ||||
| { MKTAG('d','r','a','c'), AVMEDIA_TYPE_VIDEO, CODEC_ID_DIRAC }, | { MKTAG('d','r','a','c'), AVMEDIA_TYPE_VIDEO, CODEC_ID_DIRAC }, | ||||
| { MKTAG('A','C','-','3'), AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3 }, | { MKTAG('A','C','-','3'), AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3 }, | ||||
| { MKTAG('B','S','S','D'), AVMEDIA_TYPE_AUDIO, CODEC_ID_S302M }, | |||||
| { 0 }, | { 0 }, | ||||
| }; | }; | ||||
| @@ -808,6 +808,10 @@ void ff_rtsp_parse_line(RTSPMessageHeader *reply, const char *buf, | |||||
| p += strspn(p, SPACE_CHARS); | p += strspn(p, SPACE_CHARS); | ||||
| if (method && !strcmp(method, "PLAY")) | if (method && !strcmp(method, "PLAY")) | ||||
| rtsp_parse_rtp_info(rt, p); | rtsp_parse_rtp_info(rt, p); | ||||
| } else if (av_stristart(p, "Public:", &p) && rt) { | |||||
| if (strstr(p, "GET_PARAMETER") && | |||||
| method && !strcmp(method, "OPTIONS")) | |||||
| rt->get_parameter_supported = 1; | |||||
| } | } | ||||
| } | } | ||||
| @@ -331,6 +331,11 @@ typedef struct RTSPState { | |||||
| * Polling array for udp | * Polling array for udp | ||||
| */ | */ | ||||
| struct pollfd *p; | struct pollfd *p; | ||||
| /** | |||||
| * Whether the server supports the GET_PARAMETER method. | |||||
| */ | |||||
| int get_parameter_supported; | |||||
| } RTSPState; | } RTSPState; | ||||
| /** | /** | ||||
| @@ -341,7 +341,9 @@ retry: | |||||
| /* send dummy request to keep TCP connection alive */ | /* send dummy request to keep TCP connection alive */ | ||||
| if ((av_gettime() - rt->last_cmd_time) / 1000000 >= rt->timeout / 2) { | if ((av_gettime() - rt->last_cmd_time) / 1000000 >= rt->timeout / 2) { | ||||
| if (rt->server_type != RTSP_SERVER_REAL) { | |||||
| if (rt->server_type == RTSP_SERVER_WMS || | |||||
| (rt->server_type != RTSP_SERVER_REAL && | |||||
| rt->get_parameter_supported)) { | |||||
| ff_rtsp_send_cmd_async(s, "GET_PARAMETER", rt->control_uri, NULL); | ff_rtsp_send_cmd_async(s, "GET_PARAMETER", rt->control_uri, NULL); | ||||
| } else { | } else { | ||||
| ff_rtsp_send_cmd_async(s, "OPTIONS", "*", NULL); | ff_rtsp_send_cmd_async(s, "OPTIONS", "*", NULL); | ||||
| @@ -75,7 +75,7 @@ OBJS-$(ARCH_ARM) += arm/cpu.o | |||||
| OBJS-$(ARCH_PPC) += ppc/cpu.o | OBJS-$(ARCH_PPC) += ppc/cpu.o | ||||
| OBJS-$(ARCH_X86) += x86/cpu.o | OBJS-$(ARCH_X86) += x86/cpu.o | ||||
| TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha softfloat tree | |||||
| TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha tree | |||||
| TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo | TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo | ||||
| DIRS = arm bfin sh4 x86 | DIRS = arm bfin sh4 x86 | ||||
| @@ -24,47 +24,52 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <inttypes.h> | #include <inttypes.h> | ||||
| #define FIXP (1<<16) | |||||
| #define MY_PI 205887 //(M_PI*FIX) | |||||
| #define FIXP (1 << 16) | |||||
| #define MY_PI 205887 //(M_PI * FIX) | |||||
| static int64_t int_pow(int64_t a, int p){ | |||||
| int64_t v= FIXP; | |||||
| static int64_t int_pow(int64_t a, int p) | |||||
| { | |||||
| int64_t v = FIXP; | |||||
| for(; p; p--){ | |||||
| v*= a; | |||||
| v/= FIXP; | |||||
| for (; p; p--) { | |||||
| v *= a; | |||||
| v /= FIXP; | |||||
| } | } | ||||
| return v; | return v; | ||||
| } | } | ||||
| static int64_t int_sin(int64_t a){ | |||||
| if(a<0) a= MY_PI-a; // 0..inf | |||||
| a %= 2*MY_PI; // 0..2PI | |||||
| static int64_t int_sin(int64_t a) | |||||
| { | |||||
| if (a < 0) | |||||
| a = MY_PI - a; // 0..inf | |||||
| a %= 2 * MY_PI; // 0..2PI | |||||
| if(a>=MY_PI*3/2) a -= 2*MY_PI; // -PI/2 .. 3PI/2 | |||||
| if(a>=MY_PI/2 ) a = MY_PI - a; // -PI/2 .. PI/2 | |||||
| if (a >= MY_PI * 3 / 2) | |||||
| a -= 2 * MY_PI; // -PI / 2 .. 3PI / 2 | |||||
| if (a >= MY_PI /2) | |||||
| a = MY_PI - a; // -PI / 2 .. PI / 2 | |||||
| return a - int_pow(a, 3)/6 + int_pow(a, 5)/120 - int_pow(a, 7)/5040; | |||||
| return a - int_pow(a, 3) / 6 + int_pow(a, 5) / 120 - int_pow(a, 7) / 5040; | |||||
| } | } | ||||
| #define SCALEBITS 8 | #define SCALEBITS 8 | ||||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | #define ONE_HALF (1 << (SCALEBITS - 1)) | ||||
| #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5)) | |||||
| typedef unsigned char UINT8; | |||||
| #define FIX(x) ((int) ((x) * (1L << SCALEBITS) + 0.5)) | |||||
| static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, | |||||
| UINT8 *src, int width, int height) | |||||
| static void rgb24_to_yuv420p(unsigned char *lum, unsigned char *cb, | |||||
| unsigned char *cr, unsigned char *src, | |||||
| int width, int height) | |||||
| { | { | ||||
| int wrap, wrap3, x, y; | int wrap, wrap3, x, y; | ||||
| int r, g, b, r1, g1, b1; | int r, g, b, r1, g1, b1; | ||||
| UINT8 *p; | |||||
| unsigned char *p; | |||||
| wrap = width; | |||||
| wrap = width; | |||||
| wrap3 = width * 3; | wrap3 = width * 3; | ||||
| p = src; | p = src; | ||||
| for(y=0;y<height;y+=2) { | |||||
| for(x=0;x<width;x+=2) { | |||||
| for (y = 0; y < height; y += 2) { | |||||
| for (x = 0; x < width; x += 2) { | |||||
| r = p[0]; | r = p[0]; | ||||
| g = p[1]; | g = p[1]; | ||||
| b = p[2]; | b = p[2]; | ||||
| @@ -81,7 +86,7 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, | |||||
| b1 += b; | b1 += b; | ||||
| lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + | lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + | ||||
| FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; | FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; | ||||
| p += wrap3; | |||||
| p += wrap3; | |||||
| lum += wrap; | lum += wrap; | ||||
| r = p[0]; | r = p[0]; | ||||
| @@ -104,14 +109,14 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, | |||||
| cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + | cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + | ||||
| FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; | FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; | ||||
| cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - | cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - | ||||
| FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; | |||||
| FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; | |||||
| cb++; | cb++; | ||||
| cr++; | cr++; | ||||
| p += -wrap3 + 2 * 3; | |||||
| lum += -wrap + 2; | |||||
| p += -wrap3 + 2 * 3; | |||||
| lum += -wrap + 2; | |||||
| } | } | ||||
| p += wrap3; | |||||
| p += wrap3; | |||||
| lum += wrap; | lum += wrap; | ||||
| } | } | ||||
| } | } | ||||
| @@ -119,7 +124,7 @@ static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, | |||||
| /* cif format */ | /* cif format */ | ||||
| #define DEFAULT_WIDTH 352 | #define DEFAULT_WIDTH 352 | ||||
| #define DEFAULT_HEIGHT 288 | #define DEFAULT_HEIGHT 288 | ||||
| #define DEFAULT_NB_PICT 50 | |||||
| #define DEFAULT_NB_PICT 50 | |||||
| static void pgmyuv_save(const char *filename, int w, int h, | static void pgmyuv_save(const char *filename, int w, int h, | ||||
| unsigned char *rgb_tab) | unsigned char *rgb_tab) | ||||
| @@ -130,19 +135,19 @@ static void pgmyuv_save(const char *filename, int w, int h, | |||||
| unsigned char *lum_tab, *cb_tab, *cr_tab; | unsigned char *lum_tab, *cb_tab, *cr_tab; | ||||
| lum_tab = malloc(w * h); | lum_tab = malloc(w * h); | ||||
| cb_tab = malloc((w * h) / 4); | |||||
| cr_tab = malloc((w * h) / 4); | |||||
| cb_tab = malloc(w * h / 4); | |||||
| cr_tab = malloc(w * h / 4); | |||||
| rgb24_to_yuv420p(lum_tab, cb_tab, cr_tab, rgb_tab, w, h); | rgb24_to_yuv420p(lum_tab, cb_tab, cr_tab, rgb_tab, w, h); | ||||
| f = fopen(filename,"wb"); | |||||
| fprintf(f, "P5\n%d %d\n%d\n", w, (h * 3) / 2, 255); | |||||
| f = fopen(filename, "wb"); | |||||
| fprintf(f, "P5\n%d %d\n%d\n", w, h * 3 / 2, 255); | |||||
| fwrite(lum_tab, 1, w * h, f); | fwrite(lum_tab, 1, w * h, f); | ||||
| h2 = h / 2; | h2 = h / 2; | ||||
| w2 = w / 2; | w2 = w / 2; | ||||
| cb = cb_tab; | cb = cb_tab; | ||||
| cr = cr_tab; | cr = cr_tab; | ||||
| for(i=0;i<h2;i++) { | |||||
| for (i = 0; i < h2; i++) { | |||||
| fwrite(cb, 1, w2, f); | fwrite(cb, 1, w2, f); | ||||
| fwrite(cr, 1, w2, f); | fwrite(cr, 1, w2, f); | ||||
| cb += w2; | cb += w2; | ||||
| @@ -172,104 +177,100 @@ static void put_pixel(int x, int y, int r, int g, int b) | |||||
| p[2] = b; | p[2] = b; | ||||
| } | } | ||||
| unsigned char tab_r[256*256]; | |||||
| unsigned char tab_g[256*256]; | |||||
| unsigned char tab_b[256*256]; | |||||
| unsigned char tab_r[256 * 256]; | |||||
| unsigned char tab_g[256 * 256]; | |||||
| unsigned char tab_b[256 * 256]; | |||||
| int h_cos [360]; | int h_cos [360]; | ||||
| int h_sin [360]; | int h_sin [360]; | ||||
| static int ipol(uint8_t *src, int x, int y){ | |||||
| int int_x= x>>16; | |||||
| int int_y= y>>16; | |||||
| int frac_x= x&0xFFFF; | |||||
| int frac_y= y&0xFFFF; | |||||
| int s00= src[ ( int_x &255) + 256*( int_y &255) ]; | |||||
| int s01= src[ ((int_x+1)&255) + 256*( int_y &255) ]; | |||||
| int s10= src[ ( int_x &255) + 256*((int_y+1)&255) ]; | |||||
| int s11= src[ ((int_x+1)&255) + 256*((int_y+1)&255) ]; | |||||
| int s0= (((1<<16) - frac_x)*s00 + frac_x*s01)>>8; | |||||
| int s1= (((1<<16) - frac_x)*s10 + frac_x*s11)>>8; | |||||
| return (((1<<16) - frac_y)*s0 + frac_y*s1)>>24; | |||||
| static int ipol(uint8_t *src, int x, int y) | |||||
| { | |||||
| int int_x = x >> 16; | |||||
| int int_y = y >> 16; | |||||
| int frac_x = x & 0xFFFF; | |||||
| int frac_y = y & 0xFFFF; | |||||
| int s00 = src[( int_x & 255) + 256 * ( int_y & 255)]; | |||||
| int s01 = src[((int_x + 1) & 255) + 256 * ( int_y & 255)]; | |||||
| int s10 = src[( int_x & 255) + 256 * ((int_y + 1) & 255)]; | |||||
| int s11 = src[((int_x + 1) & 255) + 256 * ((int_y + 1) & 255)]; | |||||
| int s0 = (((1 << 16) - frac_x) * s00 + frac_x * s01) >> 8; | |||||
| int s1 = (((1 << 16) - frac_x) * s10 + frac_x * s11) >> 8; | |||||
| return (((1 << 16) - frac_y) * s0 + frac_y * s1) >> 24; | |||||
| } | } | ||||
| static void gen_image(int num, int w, int h) | static void gen_image(int num, int w, int h) | ||||
| { | { | ||||
| const int c = h_cos [num % 360]; | |||||
| const int s = h_sin [num % 360]; | |||||
| const int c = h_cos [num % 360]; | |||||
| const int s = h_sin [num % 360]; | |||||
| const int xi = -(w/2) * c; | |||||
| const int yi = (w/2) * s; | |||||
| const int xi = -(w / 2) * c; | |||||
| const int yi = (w / 2) * s; | |||||
| const int xj = -(h/2) * s; | |||||
| const int yj = -(h/2) * c; | |||||
| int i,j; | |||||
| const int xj = -(h / 2) * s; | |||||
| const int yj = -(h / 2) * c; | |||||
| int i, j; | |||||
| int x,y; | |||||
| int xprime = xj; | |||||
| int yprime = yj; | |||||
| int x, y; | |||||
| int xprime = xj; | |||||
| int yprime = yj; | |||||
| for (j = 0; j < h; j++) { | |||||
| x = xprime + xi + FIXP * w / 2; | |||||
| xprime += s; | |||||
| for (j=0;j<h;j++) { | |||||
| y = yprime + yi + FIXP * h / 2; | |||||
| yprime += c; | |||||
| x = xprime + xi + FIXP*w/2; | |||||
| xprime += s; | |||||
| y = yprime + yi + FIXP*h/2; | |||||
| yprime += c; | |||||
| for ( i=0 ; i<w ; i++ ) { | |||||
| x += c; | |||||
| y -= s; | |||||
| #if 1 | |||||
| put_pixel(i, j, ipol(tab_r, x, y), ipol(tab_g, x, y), ipol(tab_b, x, y)); | |||||
| #else | |||||
| { | |||||
| unsigned dep; | |||||
| dep = ((x>>16)&255) + (((y>>16)&255)<<8); | |||||
| put_pixel(i, j, tab_r[dep], tab_g[dep], tab_b[dep]); | |||||
| } | |||||
| #endif | |||||
| for (i = 0; i < w; i++ ) { | |||||
| x += c; | |||||
| y -= s; | |||||
| put_pixel(i, j, ipol(tab_r, x, y), ipol(tab_g, x, y), ipol(tab_b, x, y)); | |||||
| } | |||||
| } | } | ||||
| } | |||||
| } | } | ||||
| #define W 256 | #define W 256 | ||||
| #define H 256 | #define H 256 | ||||
| static void init_demo(const char *filename) { | |||||
| int i,j; | |||||
| int h; | |||||
| int radian; | |||||
| char line[3 * W]; | |||||
| FILE *fichier; | |||||
| fichier = fopen(filename,"rb"); | |||||
| if (!fichier) { | |||||
| perror(filename); | |||||
| exit(1); | |||||
| } | |||||
| fread(line, 1, 15, fichier); | |||||
| for (i=0;i<H;i++) { | |||||
| fread(line,1,3*W,fichier); | |||||
| for (j=0;j<W;j++) { | |||||
| tab_r[W*i+j] = line[3*j ]; | |||||
| tab_g[W*i+j] = line[3*j + 1]; | |||||
| tab_b[W*i+j] = line[3*j + 2]; | |||||
| static int init_demo(const char *filename) | |||||
| { | |||||
| int i, j; | |||||
| int h; | |||||
| int radian; | |||||
| char line[3 * W]; | |||||
| FILE *input_file; | |||||
| input_file = fopen(filename, "rb"); | |||||
| if (!input_file) { | |||||
| perror(filename); | |||||
| return 1; | |||||
| } | } | ||||
| } | |||||
| fclose(fichier); | |||||
| /* tables sin/cos */ | |||||
| for (i=0;i<360;i++) { | |||||
| radian = 2*i*MY_PI/360; | |||||
| h = 2*FIXP + int_sin (radian); | |||||
| h_cos[i] = ( h * int_sin (radian + MY_PI/2) )/2/FIXP; | |||||
| h_sin[i] = ( h * int_sin (radian ) )/2/FIXP; | |||||
| } | |||||
| if (fread(line, 1, 15, input_file) != 15) | |||||
| return 1; | |||||
| for (i = 0; i < H; i++) { | |||||
| if (fread(line, 1, 3 * W, input_file) != 3 * W) | |||||
| return 1; | |||||
| for (j = 0; j < W; j++) { | |||||
| tab_r[W * i + j] = line[3 * j ]; | |||||
| tab_g[W * i + j] = line[3 * j + 1]; | |||||
| tab_b[W * i + j] = line[3 * j + 2]; | |||||
| } | |||||
| } | |||||
| fclose(input_file); | |||||
| /* tables sin/cos */ | |||||
| for (i = 0; i < 360; i++) { | |||||
| radian = 2 * i * MY_PI / 360; | |||||
| h = 2 * FIXP + int_sin (radian); | |||||
| h_cos[i] = h * int_sin(radian + MY_PI / 2) / 2 / FIXP; | |||||
| h_sin[i] = h * int_sin(radian) / 2 / FIXP; | |||||
| } | |||||
| return 0; | |||||
| } | } | ||||
| int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
| @@ -280,20 +281,21 @@ int main(int argc, char **argv) | |||||
| if (argc != 3) { | if (argc != 3) { | ||||
| printf("usage: %s directory/ image.pnm\n" | printf("usage: %s directory/ image.pnm\n" | ||||
| "generate a test video stream\n", argv[0]); | "generate a test video stream\n", argv[0]); | ||||
| exit(1); | |||||
| return 1; | |||||
| } | } | ||||
| w = DEFAULT_WIDTH; | w = DEFAULT_WIDTH; | ||||
| h = DEFAULT_HEIGHT; | h = DEFAULT_HEIGHT; | ||||
| rgb_tab = malloc(w * h * 3); | rgb_tab = malloc(w * h * 3); | ||||
| wrap = w * 3; | |||||
| width = w; | |||||
| height = h; | |||||
| wrap = w * 3; | |||||
| width = w; | |||||
| height = h; | |||||
| init_demo(argv[2]); | |||||
| if (init_demo(argv[2])) | |||||
| return 1; | |||||
| for(i=0;i<DEFAULT_NB_PICT;i++) { | |||||
| for (i = 0; i < DEFAULT_NB_PICT; i++) { | |||||
| snprintf(buf, sizeof(buf), "%s%02d.pgm", argv[1], i); | snprintf(buf, sizeof(buf), "%s%02d.pgm", argv[1], i); | ||||
| gen_image(i, w, h); | gen_image(i, w, h); | ||||
| pgmyuv_save(buf, w, h, rgb_tab); | pgmyuv_save(buf, w, h, rgb_tab); | ||||