* qatar/master: g723.1: simplify scale_vector() g723.1: simplify normalize_bits() vda: cosmetics: fix Doxygen comment formatting vda: better frame allocation vda: Merge implementation into one file vda: support synchronous decoding vda: Reuse the bitstream buffer and reallocate it only if needed build: Factor out mpegvideo encoding dependencies to CONFIG_MPEGVIDEOENC avprobe: Include libm.h for the log2 fallback proresenc: use the edge emulation buffer rtmp: handle bytes read reports configure: Fix typo in mpeg2video/svq1 decoder dependency declaration Use log2(x) instead of log(x) / log(2) x86: swscale: fix fragile memory accesses x86: swscale: remove disabled code x86: yadif: fix asm with suncc x86: cabac: allow building with suncc x86: mlpdsp: avoid taking address of void ARM: intmath: use native-size return types for clipping functions Conflicts: configure ffprobe.c libavcodec/Makefile libavcodec/g723_1.c libavcodec/v210dec.h libavcodec/vda.h libavcodec/vda_h264.c libavcodec/x86/cabac.h libavfilter/x86/yadif_template.c libswscale/x86/rgb2rgb_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.0
| @@ -1377,6 +1377,7 @@ CONFIG_EXTRA=" | |||
| lpc | |||
| mpegaudiodsp | |||
| mpegvideo | |||
| mpegvideoenc | |||
| nettle | |||
| rtpdec | |||
| sinewin | |||
| @@ -1500,6 +1501,7 @@ dct_select="rdft" | |||
| mdct_select="fft" | |||
| rdft_select="fft" | |||
| mpegaudiodsp_select="dct" | |||
| mpegvideoenc_select="mpegvideo" | |||
| # decoders / encoders / hardware accelerators | |||
| aac_decoder_select="mdct sinewin" | |||
| @@ -1521,7 +1523,7 @@ cook_decoder_select="mdct sinewin" | |||
| cscd_decoder_suggest="zlib" | |||
| dca_decoder_select="mdct" | |||
| dirac_decoder_select="dwt golomb" | |||
| dnxhd_encoder_select="aandcttables mpegvideo" | |||
| dnxhd_encoder_select="aandcttables mpegvideoenc" | |||
| dxa_decoder_select="zlib" | |||
| eac3_decoder_select="ac3_decoder" | |||
| eac3_encoder_select="mdct ac3dsp" | |||
| @@ -1540,9 +1542,9 @@ flv_decoder_select="h263_decoder" | |||
| flv_encoder_select="h263_encoder" | |||
| fraps_decoder_select="huffman" | |||
| h261_decoder_select="mpegvideo" | |||
| h261_encoder_select="aandcttables mpegvideo" | |||
| h261_encoder_select="aandcttables mpegvideoenc" | |||
| h263_decoder_select="h263_parser mpegvideo" | |||
| h263_encoder_select="aandcttables mpegvideo" | |||
| h263_encoder_select="aandcttables mpegvideoenc" | |||
| h263_vaapi_hwaccel_select="vaapi h263_decoder" | |||
| h263i_decoder_select="h263_decoder" | |||
| h263p_encoder_select="h263_encoder" | |||
| @@ -1558,10 +1560,10 @@ iac_decoder_select="fft mdct sinewin" | |||
| imc_decoder_select="fft mdct sinewin" | |||
| jpegls_decoder_select="golomb" | |||
| jpegls_encoder_select="golomb" | |||
| ljpeg_encoder_select="aandcttables mpegvideo" | |||
| ljpeg_encoder_select="aandcttables mpegvideoenc" | |||
| loco_decoder_select="golomb" | |||
| mdec_decoder_select="mpegvideo" | |||
| mjpeg_encoder_select="aandcttables mpegvideo" | |||
| mjpeg_encoder_select="aandcttables mpegvideoenc" | |||
| mlp_decoder_select="mlp_parser" | |||
| mp1_decoder_select="mpegaudiodsp" | |||
| mp1float_decoder_select="mpegaudiodsp" | |||
| @@ -1581,14 +1583,14 @@ mpeg_xvmc_decoder_select="mpegvideo_decoder" | |||
| mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder" | |||
| mpeg1_vdpau_hwaccel_select="vdpau mpeg1video_decoder" | |||
| mpeg1video_decoder_select="mpegvideo" | |||
| mpeg1video_encoder_select="aandcttables mpegvideo" | |||
| mpeg1video_encoder_select="aandcttables mpegvideoenc" | |||
| mpeg2_crystalhd_decoder_select="crystalhd" | |||
| mpeg2_dxva2_hwaccel_deps="dxva2api_h" | |||
| mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder" | |||
| mpeg2_vdpau_hwaccel_select="vdpau mpeg2video_decoder" | |||
| mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder" | |||
| mpeg2video_encoder_select="mpegvideo" | |||
| mpeg2video_encoder_select="aandcttables mpegvideo" | |||
| mpeg2video_decoder_select="mpegvideo" | |||
| mpeg2video_encoder_select="aandcttables mpegvideoenc" | |||
| mpeg4_crystalhd_decoder_select="crystalhd" | |||
| mpeg4_decoder_select="h263_decoder mpeg4video_parser" | |||
| mpeg4_encoder_select="h263_encoder" | |||
| @@ -1618,12 +1620,12 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel mpegvideo" | |||
| shorten_decoder_select="golomb" | |||
| sipr_decoder_select="lsp" | |||
| snow_decoder_select="dwt" | |||
| snow_encoder_select="aandcttables dwt mpegvideo" | |||
| snow_encoder_select="aandcttables dwt mpegvideoenc" | |||
| sonic_decoder_select="golomb" | |||
| sonic_encoder_select="golomb" | |||
| sonic_ls_encoder_select="golomb" | |||
| svq1_encoder_select="mpegvideo" | |||
| svq1_encoder_select="aandcttables mpegvideo" | |||
| svq1_decoder_select="mpegvideo" | |||
| svq1_encoder_select="aandcttables mpegvideoenc" | |||
| svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel mpegvideo" | |||
| svq3_decoder_suggest="zlib" | |||
| theora_decoder_select="vp3_decoder" | |||
| @@ -1120,7 +1120,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti | |||
| if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram)) | |||
| qp_histogram[qp]++; | |||
| for (j = 0; j < 32; j++) | |||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2))); | |||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log2(qp_histogram[j] + 1))); | |||
| } | |||
| if (enc->flags&CODEC_FLAG_PSNR) { | |||
| int j; | |||
| @@ -33,6 +33,7 @@ | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "libavutil/dict.h" | |||
| #include "libavutil/libm.h" | |||
| #include "libavutil/timecode.h" | |||
| #include "libavdevice/avdevice.h" | |||
| #include "libswscale/swscale.h" | |||
| @@ -121,7 +122,7 @@ static char *value_string(char *buf, int buf_size, struct unit_value uv) | |||
| long long int index; | |||
| if (uv.unit == unit_byte_str && use_byte_value_binary_prefix) { | |||
| index = (long long int) (log(vald)/log(2)) / 10; | |||
| index = (long long int) (log2(vald)) / 10; | |||
| index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 1); | |||
| vald /= pow(2, index * 10); | |||
| prefix_string = binary_unit_prefixes[index]; | |||
| @@ -56,6 +56,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ | |||
| mpegaudiodsp_fixed.o \ | |||
| mpegaudiodsp_float.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o | |||
| OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | |||
| motion_est.o ratecontrol.o | |||
| RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | |||
| OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) | |||
| OBJS-$(CONFIG_SINEWIN) += sinewin.o | |||
| @@ -144,9 +146,7 @@ OBJS-$(CONFIG_DIRAC_DECODER) += diracdec.o dirac.o diracdsp.o \ | |||
| dirac_arith.o mpeg12data.o dwt.o | |||
| OBJS-$(CONFIG_DFA_DECODER) += dfa.o | |||
| OBJS-$(CONFIG_DNXHD_DECODER) += dnxhddec.o dnxhddata.o | |||
| OBJS-$(CONFIG_DNXHD_ENCODER) += dnxhdenc.o dnxhddata.o \ | |||
| mpegvideo_enc.o motion_est.o \ | |||
| ratecontrol.o mpeg12data.o | |||
| OBJS-$(CONFIG_DNXHD_ENCODER) += dnxhdenc.o dnxhddata.o | |||
| OBJS-$(CONFIG_DPX_DECODER) += dpx.o | |||
| OBJS-$(CONFIG_DPX_ENCODER) += dpxenc.o | |||
| OBJS-$(CONFIG_DSICINAUDIO_DECODER) += dsicinav.o | |||
| @@ -199,17 +199,13 @@ OBJS-$(CONFIG_GIF_ENCODER) += gif.o lzwenc.o | |||
| OBJS-$(CONFIG_GSM_DECODER) += gsmdec.o gsmdec_data.o msgsmdec.o | |||
| OBJS-$(CONFIG_GSM_MS_DECODER) += gsmdec.o gsmdec_data.o msgsmdec.o | |||
| OBJS-$(CONFIG_H261_DECODER) += h261dec.o h261.o h261data.o error_resilience.o | |||
| OBJS-$(CONFIG_H261_ENCODER) += h261enc.o h261.o h261data.o \ | |||
| mpegvideo_enc.o motion_est.o \ | |||
| ratecontrol.o mpeg12data.o | |||
| OBJS-$(CONFIG_H261_ENCODER) += h261enc.o h261.o h261data.o | |||
| OBJS-$(CONFIG_H263_DECODER) += h263dec.o h263.o ituh263dec.o \ | |||
| mpeg4video.o mpeg4videodec.o flvdec.o\ | |||
| intelh263dec.o error_resilience.o | |||
| OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o | |||
| OBJS-$(CONFIG_H263_ENCODER) += mpegvideo_enc.o mpeg4video.o \ | |||
| mpeg4videoenc.o motion_est.o \ | |||
| ratecontrol.o h263.o ituh263enc.o \ | |||
| flvenc.o mpeg12data.o \ | |||
| OBJS-$(CONFIG_H263_ENCODER) += mpeg4videoenc.o mpeg4video.o \ | |||
| h263.o ituh263enc.o flvenc.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_H264_DECODER) += h264.o \ | |||
| h264_loopfilter.o h264_direct.o \ | |||
| @@ -243,9 +239,7 @@ OBJS-$(CONFIG_JV_DECODER) += jvdec.o | |||
| OBJS-$(CONFIG_KGV1_DECODER) += kgv1dec.o | |||
| OBJS-$(CONFIG_KMVC_DECODER) += kmvc.o | |||
| OBJS-$(CONFIG_LAGARITH_DECODER) += lagarith.o lagarithrac.o | |||
| OBJS-$(CONFIG_LJPEG_ENCODER) += ljpegenc.o mjpegenc.o mjpeg.o \ | |||
| mpegvideo_enc.o motion_est.o \ | |||
| ratecontrol.o mpeg12data.o | |||
| OBJS-$(CONFIG_LJPEG_ENCODER) += ljpegenc.o mjpegenc.o mjpeg.o | |||
| OBJS-$(CONFIG_LOCO_DECODER) += loco.o | |||
| OBJS-$(CONFIG_MACE3_DECODER) += mace.o | |||
| OBJS-$(CONFIG_MACE6_DECODER) += mace.o | |||
| @@ -254,9 +248,7 @@ OBJS-$(CONFIG_MDEC_DECODER) += mdec.o mpeg12.o mpeg12data.o \ | |||
| OBJS-$(CONFIG_MICRODVD_DECODER) += microdvddec.o ass.o | |||
| OBJS-$(CONFIG_MIMIC_DECODER) += mimic.o | |||
| OBJS-$(CONFIG_MJPEG_DECODER) += mjpegdec.o mjpeg.o | |||
| OBJS-$(CONFIG_MJPEG_ENCODER) += mjpegenc.o mjpeg.o \ | |||
| mpegvideo_enc.o motion_est.o \ | |||
| ratecontrol.o mpeg12data.o | |||
| OBJS-$(CONFIG_MJPEG_ENCODER) += mjpegenc.o mjpeg.o | |||
| OBJS-$(CONFIG_MJPEGB_DECODER) += mjpegbdec.o mjpegdec.o mjpeg.o | |||
| OBJS-$(CONFIG_MLP_DECODER) += mlpdec.o mlpdsp.o | |||
| OBJS-$(CONFIG_MMVIDEO_DECODER) += mmvideo.o | |||
| @@ -298,19 +290,15 @@ OBJS-$(CONFIG_MPEGVIDEO_DECODER) += mpeg12.o mpeg12data.o \ | |||
| OBJS-$(CONFIG_MPEG_XVMC_DECODER) += mpegvideo_xvmc.o | |||
| OBJS-$(CONFIG_MPEG1VIDEO_DECODER) += mpeg12.o mpeg12data.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_MPEG1VIDEO_ENCODER) += mpeg12enc.o mpegvideo_enc.o \ | |||
| OBJS-$(CONFIG_MPEG1VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \ | |||
| timecode.o \ | |||
| motion_est.o ratecontrol.o \ | |||
| mpeg12.o mpeg12data.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL) += dxva2_mpeg2.o | |||
| OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL) += vaapi_mpeg2.o | |||
| OBJS-$(CONFIG_MPEG2VIDEO_DECODER) += mpeg12.o mpeg12data.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpegvideo_enc.o \ | |||
| OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \ | |||
| timecode.o \ | |||
| motion_est.o ratecontrol.o \ | |||
| mpeg12.o mpeg12data.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL) += vaapi_mpeg4.o | |||
| OBJS-$(CONFIG_MSMPEG4V1_DECODER) += msmpeg4.o msmpeg4data.o | |||
| @@ -411,10 +399,8 @@ OBJS-$(CONFIG_SMACKER_DECODER) += smacker.o | |||
| OBJS-$(CONFIG_SMC_DECODER) += smc.o | |||
| OBJS-$(CONFIG_SNOW_DECODER) += snowdec.o snow.o rangecoder.o | |||
| OBJS-$(CONFIG_SNOW_ENCODER) += snowenc.o snow.o rangecoder.o \ | |||
| motion_est.o ratecontrol.o \ | |||
| h263.o \ | |||
| error_resilience.o ituh263enc.o \ | |||
| mpegvideo_enc.o mpeg12data.o | |||
| h263.o ituh263enc.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_SOL_DPCM_DECODER) += dpcm.o | |||
| OBJS-$(CONFIG_SONIC_DECODER) += sonic.o | |||
| OBJS-$(CONFIG_SONIC_ENCODER) += sonic.o | |||
| @@ -428,10 +414,8 @@ OBJS-$(CONFIG_SUNRAST_ENCODER) += sunrastenc.o | |||
| OBJS-$(CONFIG_SVQ1_DECODER) += svq1dec.o svq1.o h263.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \ | |||
| motion_est.o h263.o \ | |||
| error_resilience.o \ | |||
| ituh263enc.o mpegvideo_enc.o \ | |||
| ratecontrol.o mpeg12data.o | |||
| h263.o ituh263enc.o \ | |||
| error_resilience.o | |||
| OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o \ | |||
| h264_loopfilter.o h264_direct.o \ | |||
| h264_sei.o h264_ps.o h264_refs.o \ | |||
| @@ -230,17 +230,7 @@ static int16_t square_root(int val) | |||
| */ | |||
| static int normalize_bits(int num, int width) | |||
| { | |||
| int i = 0; | |||
| if (num) { | |||
| if (num == -1) | |||
| return width; | |||
| if (num < 0) | |||
| num = ~num; | |||
| i= width - av_log2(num) - 1; | |||
| i= FFMAX(i, 0); | |||
| } | |||
| return i; | |||
| return width - av_log2(num) - 1; | |||
| } | |||
| #define normalize_bits_int16(num) normalize_bits(num, 15) | |||
| @@ -342,7 +342,7 @@ static void imc_decode_level_coefficients(IMCContext *q, int *levlCoeffBuf, | |||
| // maybe some frequency division thingy | |||
| flcoeffs1[0] = 20000.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125 | |||
| flcoeffs2[0] = log(flcoeffs1[0]) / log(2); | |||
| flcoeffs2[0] = log2f(flcoeffs1[0]); | |||
| tmp = flcoeffs1[0]; | |||
| tmp2 = flcoeffs2[0]; | |||
| @@ -414,7 +414,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx, | |||
| highest = FFMAX(highest, chctx->flcoeffs1[i]); | |||
| for (i = 0; i < BANDS - 1; i++) | |||
| chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2); | |||
| chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]); | |||
| chctx->flcoeffs4[BANDS - 1] = limit; | |||
| highest = highest * 0.25; | |||
| @@ -1538,7 +1538,7 @@ static void update_last_header_values(SnowContext *s){ | |||
| } | |||
| static int qscale2qlog(int qscale){ | |||
| return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) | |||
| return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA)) | |||
| + 61*QROOT/8; ///< 64 > 60 | |||
| } | |||
| @@ -34,6 +34,7 @@ | |||
| #if FF_API_VDA_ASYNC | |||
| #include <pthread.h> | |||
| #endif | |||
| #include <stdint.h> | |||
| // emmintrin.h is unable to compile with -std=c99 -Werror=missing-prototypes | |||
| @@ -53,34 +54,33 @@ | |||
| #if FF_API_VDA_ASYNC | |||
| /** | |||
| * This structure is used to store a decoded frame information and data. | |||
| * This structure is used to store decoded frame information and data. | |||
| * | |||
| * @deprecated Use synchronous decoding mode. | |||
| * | |||
| */ | |||
| typedef struct { | |||
| /** | |||
| * The PTS of the frame. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * The PTS of the frame. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| int64_t pts; | |||
| /** | |||
| * The CoreVideo buffer that contains the decoded data. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * The CoreVideo buffer that contains the decoded data. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| CVPixelBufferRef cv_buffer; | |||
| /** | |||
| * A pointer to the next frame. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * A pointer to the next frame. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| struct vda_frame *next_frame; | |||
| } vda_frame; | |||
| #endif | |||
| @@ -93,106 +93,106 @@ typedef struct { | |||
| */ | |||
| struct vda_context { | |||
| /** | |||
| * VDA decoder object. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * VDA decoder object. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| VDADecoder decoder; | |||
| /** | |||
| * The Core Video pixel buffer that contains the current image data. | |||
| * | |||
| * encoding: unused | |||
| * decoding: Set by libavcodec. Unset by user. | |||
| */ | |||
| * The Core Video pixel buffer that contains the current image data. | |||
| * | |||
| * encoding: unused | |||
| * decoding: Set by libavcodec. Unset by user. | |||
| */ | |||
| CVPixelBufferRef cv_buffer; | |||
| /** | |||
| * An integer value that indicates whether use the hardware decoder in synchronous mode. | |||
| * | |||
| * encoding: unused | |||
| * decoding: Set by user. | |||
| */ | |||
| * Use the hardware decoder in synchronous mode. | |||
| * | |||
| * encoding: unused | |||
| * decoding: Set by user. | |||
| */ | |||
| int use_sync_decoding; | |||
| #if FF_API_VDA_ASYNC | |||
| /** | |||
| * VDA frames queue ordered by presentation timestamp. | |||
| * | |||
| * @deprecated Use synchronous decoding mode. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * VDA frames queue ordered by presentation timestamp. | |||
| * | |||
| * @deprecated Use synchronous decoding mode. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| vda_frame *queue; | |||
| /** | |||
| * Mutex for locking queue operations. | |||
| * | |||
| * @deprecated Use synchronous decoding mode. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| * Mutex for locking queue operations. | |||
| * | |||
| * @deprecated Use synchronous decoding mode. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| pthread_mutex_t queue_mutex; | |||
| #endif | |||
| /** | |||
| * The frame width. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| * The frame width. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| int width; | |||
| /** | |||
| * The frame height. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| * The frame height. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| int height; | |||
| /** | |||
| * The frame format. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| * The frame format. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| int format; | |||
| /** | |||
| * The pixel format for output image buffers. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| * The pixel format for output image buffers. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by user. | |||
| */ | |||
| OSType cv_pix_fmt_type; | |||
| /** | |||
| * The current bitstream buffer. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| uint8_t *bitstream; | |||
| * The current bitstream buffer. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| uint8_t *priv_bitstream; | |||
| /** | |||
| * The current size of the bitstream. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| int bitstream_size; | |||
| * The current size of the bitstream. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| int priv_bitstream_size; | |||
| /** | |||
| * The reference size used for fast reallocation. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| int ref_size; | |||
| * The reference size used for fast reallocation. | |||
| * | |||
| * - encoding: unused | |||
| * - decoding: Set/Unset by libavcodec. | |||
| */ | |||
| int priv_allocated_size; | |||
| }; | |||
| /** Create the video decoder. */ | |||
| @@ -130,7 +130,7 @@ static void vda_decoder_callback (void *vda_hw_ctx, | |||
| uint32_t infoFlags, | |||
| CVImageBufferRef image_buffer) | |||
| { | |||
| struct vda_context *vda_ctx = (struct vda_context*)vda_hw_ctx; | |||
| struct vda_context *vda_ctx = vda_hw_ctx; | |||
| if (!image_buffer) | |||
| return; | |||
| @@ -140,8 +140,7 @@ static void vda_decoder_callback (void *vda_hw_ctx, | |||
| if (vda_ctx->use_sync_decoding) { | |||
| vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer); | |||
| } | |||
| else { | |||
| } else { | |||
| vda_frame *new_frame; | |||
| vda_frame *queue_walker; | |||
| @@ -188,8 +187,8 @@ static int vda_sync_decode(struct vda_context *vda_ctx) | |||
| uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames | |||
| coded_frame = CFDataCreate(kCFAllocatorDefault, | |||
| vda_ctx->bitstream, | |||
| vda_ctx->bitstream_size); | |||
| vda_ctx->priv_bitstream, | |||
| vda_ctx->priv_bitstream_size); | |||
| status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL); | |||
| @@ -210,7 +209,7 @@ static int start_frame(AVCodecContext *avctx, | |||
| if (!vda_ctx->decoder) | |||
| return -1; | |||
| vda_ctx->bitstream_size = 0; | |||
| vda_ctx->priv_bitstream_size = 0; | |||
| return 0; | |||
| } | |||
| @@ -225,38 +224,38 @@ static int decode_slice(AVCodecContext *avctx, | |||
| if (!vda_ctx->decoder) | |||
| return -1; | |||
| tmp = av_fast_realloc(vda_ctx->bitstream, | |||
| &vda_ctx->ref_size, | |||
| vda_ctx->bitstream_size+size+4); | |||
| tmp = av_fast_realloc(vda_ctx->priv_bitstream, | |||
| &vda_ctx->priv_allocated_size, | |||
| vda_ctx->priv_bitstream_size + size + 4); | |||
| if (!tmp) | |||
| return AVERROR(ENOMEM); | |||
| vda_ctx->bitstream = tmp; | |||
| vda_ctx->priv_bitstream = tmp; | |||
| AV_WB32(vda_ctx->bitstream+vda_ctx->bitstream_size, size); | |||
| memcpy(vda_ctx->bitstream+vda_ctx->bitstream_size+4, buffer, size); | |||
| AV_WB32(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size, size); | |||
| memcpy(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size + 4, buffer, size); | |||
| vda_ctx->bitstream_size += size + 4; | |||
| vda_ctx->priv_bitstream_size += size + 4; | |||
| return 0; | |||
| } | |||
| static int end_frame(AVCodecContext *avctx) | |||
| { | |||
| H264Context *h = avctx->priv_data; | |||
| struct vda_context *vda_ctx = avctx->hwaccel_context; | |||
| AVFrame *frame = &h->s.current_picture_ptr->f; | |||
| H264Context *h = avctx->priv_data; | |||
| struct vda_context *vda_ctx = avctx->hwaccel_context; | |||
| AVFrame *frame = &h->s.current_picture_ptr->f; | |||
| int status; | |||
| if (!vda_ctx->decoder || !vda_ctx->bitstream) | |||
| if (!vda_ctx->decoder || !vda_ctx->priv_bitstream) | |||
| return -1; | |||
| if (vda_ctx->use_sync_decoding) { | |||
| status = vda_sync_decode(vda_ctx); | |||
| frame->data[3] = (void*)vda_ctx->cv_buffer; | |||
| } else { | |||
| status = vda_decoder_decode(vda_ctx, vda_ctx->bitstream, | |||
| vda_ctx->bitstream_size, | |||
| status = vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream, | |||
| vda_ctx->priv_bitstream_size, | |||
| frame->reordered_opaque); | |||
| } | |||
| @@ -280,8 +279,8 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx, | |||
| CFMutableDictionaryRef io_surface_properties; | |||
| CFNumberRef cv_pix_fmt; | |||
| vda_ctx->bitstream = NULL; | |||
| vda_ctx->ref_size = 0; | |||
| vda_ctx->priv_bitstream = NULL; | |||
| vda_ctx->priv_allocated_size = 0; | |||
| #if FF_API_VDA_ASYNC | |||
| pthread_mutex_init(&vda_ctx->queue_mutex, NULL); | |||
| @@ -341,7 +340,7 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx, | |||
| status = VDADecoderCreate(config_info, | |||
| buffer_attributes, | |||
| (VDADecoderOutputCallback *)vda_decoder_callback, | |||
| vda_decoder_callback, | |||
| vda_ctx, | |||
| &vda_ctx->decoder); | |||
| @@ -368,8 +367,7 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx) | |||
| vda_clear_queue(vda_ctx); | |||
| pthread_mutex_destroy(&vda_ctx->queue_mutex); | |||
| #endif | |||
| if (vda_ctx->bitstream) | |||
| av_freep(&vda_ctx->bitstream); | |||
| av_freep(&vda_ctx->priv_bitstream); | |||
| return status; | |||
| } | |||
| @@ -89,6 +89,9 @@ | |||
| #ifndef FF_API_CODEC_ID | |||
| #define FF_API_CODEC_ID (LIBAVCODEC_VERSION_MAJOR < 55) | |||
| #endif | |||
| #ifndef FF_API_VDA_ASYNC | |||
| #define FF_API_VDA_ASYNC (LIBAVCODEC_VERSION_MAJOR < 55) | |||
| #endif | |||
| #ifndef FF_API_VDA_ASYNC | |||
| #define FF_API_VDA_ASYNC (LIBAVCODEC_VERSION_MAJOR < 55) | |||
| @@ -169,14 +169,16 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, | |||
| __asm__ volatile( | |||
| BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1", | |||
| "%2", "%q2", "%3", "%b3", | |||
| "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10", "%11") | |||
| "%c6(%5)", "%c7(%5)", | |||
| AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), | |||
| AV_STRINGIFY(H264_LPS_RANGE_OFFSET), | |||
| AV_STRINGIFY(H264_MLPS_STATE_OFFSET), | |||
| "%8") | |||
| : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) | |||
| : "r"(state), "r"(c), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG | |||
| "i"(offsetof(CABACContext, bytestream_end)) | |||
| TABLES_ARG | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| return bit & 1; | |||
| @@ -188,8 +190,8 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) | |||
| { | |||
| x86_reg tmp; | |||
| __asm__ volatile( | |||
| "movl %a6(%2), %k1 \n\t" | |||
| "movl %a3(%2), %%eax \n\t" | |||
| "movl %c6(%2), %k1 \n\t" | |||
| "movl %c3(%2), %%eax \n\t" | |||
| "shl $17, %k1 \n\t" | |||
| "add %%eax, %%eax \n\t" | |||
| "sub %k1, %%eax \n\t" | |||
| @@ -200,16 +202,16 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) | |||
| "sub %%edx, %%ecx \n\t" | |||
| "test %%ax, %%ax \n\t" | |||
| "jnz 1f \n\t" | |||
| "mov %a4(%2), %1 \n\t" | |||
| "mov %c4(%2), %1 \n\t" | |||
| "subl $0xFFFF, %%eax \n\t" | |||
| "movzwl (%1), %%edx \n\t" | |||
| "bswap %%edx \n\t" | |||
| "shrl $15, %%edx \n\t" | |||
| "add $2, %1 \n\t" | |||
| "addl %%edx, %%eax \n\t" | |||
| "mov %1, %a4(%2) \n\t" | |||
| "mov %1, %c4(%2) \n\t" | |||
| "1: \n\t" | |||
| "movl %%eax, %a3(%2) \n\t" | |||
| "movl %%eax, %c3(%2) \n\t" | |||
| : "+c"(val), "=&r"(tmp) | |||
| : "r"(c), | |||
| @@ -63,7 +63,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", | |||
| "%5", "%q5", "%k0", "%b0", | |||
| "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16") | |||
| "%c11(%6)", "%c12(%6)", | |||
| AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), | |||
| AV_STRINGIFY(H264_LPS_RANGE_OFFSET), | |||
| AV_STRINGIFY(H264_MLPS_STATE_OFFSET), | |||
| "%13") | |||
| "test $1, %4 \n\t" | |||
| " jz 4f \n\t" | |||
| @@ -71,7 +75,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", | |||
| "%5", "%q5", "%k0", "%b0", | |||
| "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16") | |||
| "%c11(%6)", "%c12(%6)", | |||
| AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), | |||
| AV_STRINGIFY(H264_LPS_RANGE_OFFSET), | |||
| AV_STRINGIFY(H264_MLPS_STATE_OFFSET), | |||
| "%13") | |||
| "sub %10, %1 \n\t" | |||
| "mov %2, %0 \n\t" | |||
| @@ -99,10 +107,8 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| "+&r"(c->low), "=&r"(bit), "+&r"(c->range) | |||
| : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG | |||
| "i"(offsetof(CABACContext, bytestream_end)) | |||
| TABLES_ARG | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| return coeff_count; | |||
| @@ -137,22 +143,30 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
| BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", | |||
| "%5", "%q5", "%k0", "%b0", | |||
| "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18") | |||
| "%c12(%7)", "%c13(%7)", | |||
| AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), | |||
| AV_STRINGIFY(H264_LPS_RANGE_OFFSET), | |||
| AV_STRINGIFY(H264_MLPS_STATE_OFFSET), | |||
| "%15") | |||
| "mov %1, %k6 \n\t" | |||
| "test $1, %4 \n\t" | |||
| " jz 4f \n\t" | |||
| #ifdef BROKEN_RELOCATIONS | |||
| "movzbl %a17(%18, %q6), %k6\n\t" | |||
| "movzbl %c14(%15, %q6), %k6\n\t" | |||
| #else | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+%c14(%k6), %k6\n\t" | |||
| #endif | |||
| "add %11, %6 \n\t" | |||
| BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", | |||
| "%5", "%q5", "%k0", "%b0", | |||
| "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18") | |||
| "%c12(%7)", "%c13(%7)", | |||
| AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), | |||
| AV_STRINGIFY(H264_LPS_RANGE_OFFSET), | |||
| AV_STRINGIFY(H264_MLPS_STATE_OFFSET), | |||
| "%15") | |||
| "mov %2, %0 \n\t" | |||
| "mov %1, %k6 \n\t" | |||
| @@ -179,9 +193,6 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
| "m"(sig_off), "m"(last_coeff_ctx_base), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET), | |||
| "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| @@ -25,21 +25,21 @@ | |||
| #if HAVE_7REGS | |||
| extern void ff_mlp_firorder_8; | |||
| extern void ff_mlp_firorder_7; | |||
| extern void ff_mlp_firorder_6; | |||
| extern void ff_mlp_firorder_5; | |||
| extern void ff_mlp_firorder_4; | |||
| extern void ff_mlp_firorder_3; | |||
| extern void ff_mlp_firorder_2; | |||
| extern void ff_mlp_firorder_1; | |||
| extern void ff_mlp_firorder_0; | |||
| extern void ff_mlp_iirorder_4; | |||
| extern void ff_mlp_iirorder_3; | |||
| extern void ff_mlp_iirorder_2; | |||
| extern void ff_mlp_iirorder_1; | |||
| extern void ff_mlp_iirorder_0; | |||
| extern char ff_mlp_firorder_8; | |||
| extern char ff_mlp_firorder_7; | |||
| extern char ff_mlp_firorder_6; | |||
| extern char ff_mlp_firorder_5; | |||
| extern char ff_mlp_firorder_4; | |||
| extern char ff_mlp_firorder_3; | |||
| extern char ff_mlp_firorder_2; | |||
| extern char ff_mlp_firorder_1; | |||
| extern char ff_mlp_firorder_0; | |||
| extern char ff_mlp_iirorder_4; | |||
| extern char ff_mlp_iirorder_3; | |||
| extern char ff_mlp_iirorder_2; | |||
| extern char ff_mlp_iirorder_1; | |||
| extern char ff_mlp_iirorder_0; | |||
| static const void *firtable[9] = { &ff_mlp_firorder_0, &ff_mlp_firorder_1, | |||
| &ff_mlp_firorder_2, &ff_mlp_firorder_3, | |||
| @@ -107,8 +107,8 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| uint8_t *next, int w, int prefs, | |||
| int mrefs, int parity, int mode) | |||
| { | |||
| uint8_t tmp[5*16]; | |||
| uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15); | |||
| uint8_t tmpU[5*16]; | |||
| uint8_t *tmp= (uint8_t*)(((uint64_t)(tmpU+15)) & ~15); | |||
| int x; | |||
| #define FILTER\ | |||
| @@ -122,9 +122,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| MOVQ" "MM"3, "MM"4 \n\t"\ | |||
| "paddw "MM"2, "MM"3 \n\t"\ | |||
| "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\ | |||
| MOVQ" "MM"0, (%[tmpA]) \n\t" /* c */\ | |||
| MOVQ" "MM"3, 16(%[tmpA]) \n\t" /* d */\ | |||
| MOVQ" "MM"1, 32(%[tmpA]) \n\t" /* e */\ | |||
| MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\ | |||
| MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\ | |||
| MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\ | |||
| "psubw "MM"4, "MM"2 \n\t"\ | |||
| PABS( MM"4", MM"2") /* temporal_diff0 */\ | |||
| LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\ | |||
| @@ -146,7 +146,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\ | |||
| "psrlw $1, "MM"3 \n\t"\ | |||
| "pmaxsw "MM"3, "MM"2 \n\t"\ | |||
| MOVQ" "MM"2, 48(%[tmpA]) \n\t" /* diff */\ | |||
| MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\ | |||
| \ | |||
| "paddw "MM"0, "MM"1 \n\t"\ | |||
| "paddw "MM"0, "MM"0 \n\t"\ | |||
| @@ -177,7 +177,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| CHECK2\ | |||
| \ | |||
| /* if(p->mode<2) ... */\ | |||
| MOVQ" 48(%[tmpA]), "MM"6 \n\t" /* diff */\ | |||
| MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\ | |||
| "cmpl $2, %[mode] \n\t"\ | |||
| "jge 1f \n\t"\ | |||
| LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\ | |||
| @@ -188,9 +188,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| "paddw "MM"5, "MM"3 \n\t"\ | |||
| "psrlw $1, "MM"2 \n\t" /* b */\ | |||
| "psrlw $1, "MM"3 \n\t" /* f */\ | |||
| MOVQ" (%[tmpA]), "MM"4 \n\t" /* c */\ | |||
| MOVQ" 16(%[tmpA]), "MM"5 \n\t" /* d */\ | |||
| MOVQ" 32(%[tmpA]), "MM"7 \n\t" /* e */\ | |||
| MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\ | |||
| MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\ | |||
| MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\ | |||
| "psubw "MM"4, "MM"2 \n\t" /* b-c */\ | |||
| "psubw "MM"7, "MM"3 \n\t" /* f-e */\ | |||
| MOVQ" "MM"5, "MM"0 \n\t"\ | |||
| @@ -209,7 +209,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\ | |||
| "1: \n\t"\ | |||
| \ | |||
| MOVQ" 16(%[tmpA]), "MM"2 \n\t" /* d */\ | |||
| MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\ | |||
| MOVQ" "MM"2, "MM"3 \n\t"\ | |||
| "psubw "MM"6, "MM"2 \n\t" /* d-diff */\ | |||
| "paddw "MM"6, "MM"3 \n\t" /* d+diff */\ | |||
| @@ -217,14 +217,13 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, | |||
| "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\ | |||
| "packuswb "MM"1, "MM"1 \n\t"\ | |||
| \ | |||
| :\ | |||
| :[tmpA] "r"(tmpA),\ | |||
| [prev] "r"(prev),\ | |||
| ::[prev] "r"(prev),\ | |||
| [cur] "r"(cur),\ | |||
| [next] "r"(next),\ | |||
| [prefs]"r"((x86_reg)prefs),\ | |||
| [mrefs]"r"((x86_reg)mrefs),\ | |||
| [mode] "g"(mode)\ | |||
| [mode] "g"(mode),\ | |||
| [tmp] "r"(tmp)\ | |||
| );\ | |||
| __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\ | |||
| dst += STEP;\ | |||
| @@ -1200,6 +1200,9 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt) | |||
| #endif | |||
| switch (pkt->type) { | |||
| case RTMP_PT_BYTES_READ: | |||
| av_dlog(s, "received bytes read report\n"); | |||
| break; | |||
| case RTMP_PT_CHUNK_SIZE: | |||
| if ((ret = handle_chunk_size(s, pkt)) < 0) | |||
| return ret; | |||
| @@ -44,7 +44,7 @@ static av_always_inline av_const int FASTDIV(int a, int b) | |||
| } | |||
| #define av_clip_uint8 av_clip_uint8_arm | |||
| static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) | |||
| static av_always_inline av_const unsigned av_clip_uint8_arm(int a) | |||
| { | |||
| unsigned x; | |||
| __asm__ ("usat %0, #8, %1" : "=r"(x) : "r"(a)); | |||
| @@ -52,15 +52,15 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) | |||
| } | |||
| #define av_clip_int8 av_clip_int8_arm | |||
| static av_always_inline av_const uint8_t av_clip_int8_arm(int a) | |||
| static av_always_inline av_const int av_clip_int8_arm(int a) | |||
| { | |||
| unsigned x; | |||
| int x; | |||
| __asm__ ("ssat %0, #8, %1" : "=r"(x) : "r"(a)); | |||
| return x; | |||
| } | |||
| #define av_clip_uint16 av_clip_uint16_arm | |||
| static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) | |||
| static av_always_inline av_const unsigned av_clip_uint16_arm(int a) | |||
| { | |||
| unsigned x; | |||
| __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a)); | |||
| @@ -68,7 +68,7 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) | |||
| } | |||
| #define av_clip_int16 av_clip_int16_arm | |||
| static av_always_inline av_const int16_t av_clip_int16_arm(int a) | |||
| static av_always_inline av_const int av_clip_int16_arm(int a) | |||
| { | |||
| int x; | |||
| __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); | |||
| @@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr | |||
| __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "punpckldq 3%1, %%mm0 \n\t" | |||
| "movd 6%1, %%mm1 \n\t" | |||
| "punpckldq 9%1, %%mm1 \n\t" | |||
| "movd 12%1, %%mm2 \n\t" | |||
| "punpckldq 15%1, %%mm2 \n\t" | |||
| "movd 18%1, %%mm3 \n\t" | |||
| "punpckldq 21%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "punpckldq 3(%1), %%mm0 \n\t" | |||
| "movd 6(%1), %%mm1 \n\t" | |||
| "punpckldq 9(%1), %%mm1 \n\t" | |||
| "movd 12(%1), %%mm2 \n\t" | |||
| "punpckldq 15(%1), %%mm2 \n\t" | |||
| "movd 18(%1), %%mm3 \n\t" | |||
| "punpckldq 21(%1), %%mm3 \n\t" | |||
| "por %%mm7, %%mm0 \n\t" | |||
| "por %%mm7, %%mm1 \n\t" | |||
| "por %%mm7, %%mm2 \n\t" | |||
| "por %%mm7, %%mm3 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| MOVNTQ" %%mm1, 8%0 \n\t" | |||
| MOVNTQ" %%mm2, 16%0 \n\t" | |||
| MOVNTQ" %%mm3, 24%0" | |||
| :"=m"(*dest) | |||
| :"m"(*s) | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| MOVNTQ" %%mm1, 8(%0) \n\t" | |||
| MOVNTQ" %%mm2, 16(%0) \n\t" | |||
| MOVNTQ" %%mm3, 24(%0)" | |||
| :: "r"(dest), "r"(s) | |||
| :"memory"); | |||
| dest += 32; | |||
| s += 24; | |||
| @@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr | |||
| "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ | |||
| "por %%mm5, %%mm4 \n\t" \ | |||
| \ | |||
| MOVNTQ" %%mm0, %0 \n\t" \ | |||
| MOVNTQ" %%mm1, 8%0 \n\t" \ | |||
| MOVNTQ" %%mm4, 16%0" | |||
| MOVNTQ" %%mm0, (%0) \n\t" \ | |||
| MOVNTQ" %%mm1, 8(%0) \n\t" \ | |||
| MOVNTQ" %%mm4, 16(%0)" | |||
| static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) | |||
| @@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 31; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm1 \n\t" | |||
| "movq 16%1, %%mm4 \n\t" | |||
| "movq 24%1, %%mm5 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm1 \n\t" | |||
| "movq 16(%1), %%mm4 \n\t" | |||
| "movq 24(%1), %%mm5 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm1, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm6 \n\t" | |||
| "movq %%mm5, %%mm7 \n\t" | |||
| STORE_BGR24_MMX | |||
| :"=m"(*dest) | |||
| :"m"(*s) | |||
| :: "r"(dest), "r"(s) | |||
| :"memory"); | |||
| dest += 24; | |||
| s += 32; | |||
| @@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 15; | |||
| while (s<mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "pand %%mm4, %%mm0 \n\t" | |||
| "pand %%mm4, %%mm2 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| MOVNTQ" %%mm2, 8%0" | |||
| :"=m"(*d) | |||
| :"m"(*s) | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| MOVNTQ" %%mm2, 8(%0)" | |||
| :: "r"(d), "r"(s) | |||
| ); | |||
| d+=16; | |||
| s+=16; | |||
| @@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 15; | |||
| while (s<mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "psrlq $1, %%mm0 \n\t" | |||
| @@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| "pand %%mm6, %%mm3 \n\t" | |||
| "por %%mm1, %%mm0 \n\t" | |||
| "por %%mm3, %%mm2 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| MOVNTQ" %%mm2, 8%0" | |||
| :"=m"(*d) | |||
| :"m"(*s) | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| MOVNTQ" %%mm2, 8(%0)" | |||
| :: "r"(d), "r"(s) | |||
| ); | |||
| d+=16; | |||
| s+=16; | |||
| @@ -287,7 +283,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end - 15; | |||
| #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) | |||
| __asm__ volatile( | |||
| "movq %3, %%mm5 \n\t" | |||
| "movq %4, %%mm6 \n\t" | |||
| @@ -322,47 +317,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s | |||
| : "+r" (d), "+r"(s) | |||
| : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | |||
| ); | |||
| #else | |||
| __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm__ volatile( | |||
| "movq %0, %%mm7 \n\t" | |||
| "movq %1, %%mm6 \n\t" | |||
| ::"m"(red_16mask),"m"(green_16mask)); | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 4%1, %%mm3 \n\t" | |||
| "punpckldq 8%1, %%mm0 \n\t" | |||
| "punpckldq 12%1, %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| "psrlq $3, %%mm0 \n\t" | |||
| "psrlq $3, %%mm3 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %2, %%mm3 \n\t" | |||
| "psrlq $5, %%mm1 \n\t" | |||
| "psrlq $5, %%mm4 \n\t" | |||
| "pand %%mm6, %%mm1 \n\t" | |||
| "pand %%mm6, %%mm4 \n\t" | |||
| "psrlq $8, %%mm2 \n\t" | |||
| "psrlq $8, %%mm5 \n\t" | |||
| "pand %%mm7, %%mm2 \n\t" | |||
| "pand %%mm7, %%mm5 \n\t" | |||
| "por %%mm1, %%mm0 \n\t" | |||
| "por %%mm4, %%mm3 \n\t" | |||
| "por %%mm2, %%mm0 \n\t" | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| #endif | |||
| __asm__ volatile(SFENCE:::"memory"); | |||
| __asm__ volatile(EMMS:::"memory"); | |||
| while (s < end) { | |||
| @@ -386,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 15; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 4%1, %%mm3 \n\t" | |||
| "punpckldq 8%1, %%mm0 \n\t" | |||
| "punpckldq 12%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 4(%1), %%mm3 \n\t" | |||
| "punpckldq 8(%1), %%mm0 \n\t" | |||
| "punpckldq 12(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -413,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| :: "r"(d),"r"(s),"m"(blue_16mask):"memory"); | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| @@ -434,7 +388,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end - 15; | |||
| #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) | |||
| __asm__ volatile( | |||
| "movq %3, %%mm5 \n\t" | |||
| "movq %4, %%mm6 \n\t" | |||
| @@ -469,47 +422,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| : "+r" (d), "+r"(s) | |||
| : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | |||
| ); | |||
| #else | |||
| __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm__ volatile( | |||
| "movq %0, %%mm7 \n\t" | |||
| "movq %1, %%mm6 \n\t" | |||
| ::"m"(red_15mask),"m"(green_15mask)); | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 4%1, %%mm3 \n\t" | |||
| "punpckldq 8%1, %%mm0 \n\t" | |||
| "punpckldq 12%1, %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| "psrlq $3, %%mm0 \n\t" | |||
| "psrlq $3, %%mm3 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %2, %%mm3 \n\t" | |||
| "psrlq $6, %%mm1 \n\t" | |||
| "psrlq $6, %%mm4 \n\t" | |||
| "pand %%mm6, %%mm1 \n\t" | |||
| "pand %%mm6, %%mm4 \n\t" | |||
| "psrlq $9, %%mm2 \n\t" | |||
| "psrlq $9, %%mm5 \n\t" | |||
| "pand %%mm7, %%mm2 \n\t" | |||
| "pand %%mm7, %%mm5 \n\t" | |||
| "por %%mm1, %%mm0 \n\t" | |||
| "por %%mm4, %%mm3 \n\t" | |||
| "por %%mm2, %%mm0 \n\t" | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| #endif | |||
| __asm__ volatile(SFENCE:::"memory"); | |||
| __asm__ volatile(EMMS:::"memory"); | |||
| while (s < end) { | |||
| @@ -533,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 15; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 4%1, %%mm3 \n\t" | |||
| "punpckldq 8%1, %%mm0 \n\t" | |||
| "punpckldq 12%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 4(%1), %%mm3 \n\t" | |||
| "punpckldq 8(%1), %%mm0 \n\t" | |||
| "punpckldq 12(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -560,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| @@ -588,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 11; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 3%1, %%mm3 \n\t" | |||
| "punpckldq 6%1, %%mm0 \n\t" | |||
| "punpckldq 9%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 3(%1), %%mm3 \n\t" | |||
| "punpckldq 6(%1), %%mm0 \n\t" | |||
| "punpckldq 9(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -615,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| @@ -645,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 15; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 3%1, %%mm3 \n\t" | |||
| "punpckldq 6%1, %%mm0 \n\t" | |||
| "punpckldq 9%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 3(%1), %%mm3 \n\t" | |||
| "punpckldq 6(%1), %%mm0 \n\t" | |||
| "punpckldq 9(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -672,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| @@ -702,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 11; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 3%1, %%mm3 \n\t" | |||
| "punpckldq 6%1, %%mm0 \n\t" | |||
| "punpckldq 9%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 3(%1), %%mm3 \n\t" | |||
| "punpckldq 6(%1), %%mm0 \n\t" | |||
| "punpckldq 9(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -729,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| @@ -759,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 15; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movd %1, %%mm0 \n\t" | |||
| "movd 3%1, %%mm3 \n\t" | |||
| "punpckldq 6%1, %%mm0 \n\t" | |||
| "punpckldq 9%1, %%mm3 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd 3(%1), %%mm3 \n\t" | |||
| "punpckldq 6(%1), %%mm0 \n\t" | |||
| "punpckldq 9(%1), %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| @@ -786,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s | |||
| "por %%mm5, %%mm3 \n\t" | |||
| "psllq $16, %%mm3 \n\t" | |||
| "por %%mm3, %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |||
| MOVNTQ" %%mm0, (%0) \n\t" | |||
| ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| @@ -812,10 +724,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 7; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| "movq %1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -844,9 +756,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| "movq %%mm0, %%mm6 \n\t" | |||
| "movq %%mm3, %%mm7 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm1 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -873,7 +785,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm5, %%mm3 \n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null) | |||
| :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |||
| :"memory"); | |||
| /* borrowed 32 to 24 */ | |||
| __asm__ volatile( | |||
| @@ -889,8 +801,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| STORE_BGR24_MMX | |||
| :"=m"(*d) | |||
| :"m"(*s) | |||
| :: "r"(d), "m"(*s) | |||
| :"memory"); | |||
| d += 24; | |||
| s += 8; | |||
| @@ -917,10 +828,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| mm_end = end - 7; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| "movq %1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -950,9 +861,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| "movq %%mm0, %%mm6 \n\t" | |||
| "movq %%mm3, %%mm7 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm1 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -979,7 +890,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| "por %%mm4, %%mm3 \n\t" | |||
| "por %%mm5, %%mm3 \n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |||
| :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |||
| :"memory"); | |||
| /* borrowed 32 to 24 */ | |||
| __asm__ volatile( | |||
| @@ -995,8 +906,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| STORE_BGR24_MMX | |||
| :"=m"(*d) | |||
| :"m"(*s) | |||
| :: "r"(d), "m"(*s) | |||
| :"memory"); | |||
| d += 24; | |||
| s += 8; | |||
| @@ -1028,8 +938,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||
| "movq %%mm0, %%mm3 \n\t" \ | |||
| "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ | |||
| "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ | |||
| MOVNTQ" %%mm0, %0 \n\t" \ | |||
| MOVNTQ" %%mm3, 8%0 \n\t" \ | |||
| MOVNTQ" %%mm0, (%0) \n\t" \ | |||
| MOVNTQ" %%mm3, 8(%0) \n\t" \ | |||
| static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) | |||
| { | |||
| @@ -1044,10 +954,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 3; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| "movq %1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -1056,8 +966,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s | |||
| "pmulhw %5, %%mm1 \n\t" | |||
| "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" | |||
| PACK_RGB32 | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid) | |||
| ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) | |||
| :"memory"); | |||
| d += 16; | |||
| s += 4; | |||
| @@ -1087,10 +996,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s | |||
| mm_end = end - 3; | |||
| while (s < mm_end) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| "movq %1, %%mm2 \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "pand %2, %%mm0 \n\t" | |||
| "pand %3, %%mm1 \n\t" | |||
| "pand %4, %%mm2 \n\t" | |||
| @@ -1100,8 +1009,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s | |||
| "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" | |||
| "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" | |||
| PACK_RGB32 | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) | |||
| ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) | |||
| :"memory"); | |||
| d += 16; | |||
| s += 4; | |||
| @@ -2029,8 +1937,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||
| int srcStride1, int srcStride2, | |||
| int dstStride1, int dstStride2) | |||
| { | |||
| x86_reg y; | |||
| int x,w,h; | |||
| x86_reg x, y; | |||
| int w,h; | |||
| w=width/2; h=height/2; | |||
| __asm__ volatile( | |||
| PREFETCH" %0 \n\t" | |||
| @@ -2042,11 +1950,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||
| x=0; | |||
| for (;x<w-31;x+=32) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| "movq 16%1, %%mm4 \n\t" | |||
| "movq 24%1, %%mm6 \n\t" | |||
| PREFETCH" 32(%1,%2) \n\t" | |||
| "movq (%1,%2), %%mm0 \n\t" | |||
| "movq 8(%1,%2), %%mm2 \n\t" | |||
| "movq 16(%1,%2), %%mm4 \n\t" | |||
| "movq 24(%1,%2), %%mm6 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| @@ -2059,16 +1967,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||
| "punpckhbw %%mm5, %%mm5 \n\t" | |||
| "punpcklbw %%mm6, %%mm6 \n\t" | |||
| "punpckhbw %%mm7, %%mm7 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| MOVNTQ" %%mm1, 8%0 \n\t" | |||
| MOVNTQ" %%mm2, 16%0 \n\t" | |||
| MOVNTQ" %%mm3, 24%0 \n\t" | |||
| MOVNTQ" %%mm4, 32%0 \n\t" | |||
| MOVNTQ" %%mm5, 40%0 \n\t" | |||
| MOVNTQ" %%mm6, 48%0 \n\t" | |||
| MOVNTQ" %%mm7, 56%0" | |||
| :"=m"(d[2*x]) | |||
| :"m"(s1[x]) | |||
| MOVNTQ" %%mm0, (%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm7, 56(%0,%2,2)" | |||
| :: "r"(d), "r"(s1), "r"(x) | |||
| :"memory"); | |||
| } | |||
| for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; | |||
| @@ -2079,11 +1986,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||
| x=0; | |||
| for (;x<w-31;x+=32) { | |||
| __asm__ volatile( | |||
| PREFETCH" 32%1 \n\t" | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 8%1, %%mm2 \n\t" | |||
| "movq 16%1, %%mm4 \n\t" | |||
| "movq 24%1, %%mm6 \n\t" | |||
| PREFETCH" 32(%1,%2) \n\t" | |||
| "movq (%1,%2), %%mm0 \n\t" | |||
| "movq 8(%1,%2), %%mm2 \n\t" | |||
| "movq 16(%1,%2), %%mm4 \n\t" | |||
| "movq 24(%1,%2), %%mm6 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| @@ -2096,16 +2003,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||
| "punpckhbw %%mm5, %%mm5 \n\t" | |||
| "punpcklbw %%mm6, %%mm6 \n\t" | |||
| "punpckhbw %%mm7, %%mm7 \n\t" | |||
| MOVNTQ" %%mm0, %0 \n\t" | |||
| MOVNTQ" %%mm1, 8%0 \n\t" | |||
| MOVNTQ" %%mm2, 16%0 \n\t" | |||
| MOVNTQ" %%mm3, 24%0 \n\t" | |||
| MOVNTQ" %%mm4, 32%0 \n\t" | |||
| MOVNTQ" %%mm5, 40%0 \n\t" | |||
| MOVNTQ" %%mm6, 48%0 \n\t" | |||
| MOVNTQ" %%mm7, 56%0" | |||
| :"=m"(d[2*x]) | |||
| :"m"(s2[x]) | |||
| MOVNTQ" %%mm0, (%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" | |||
| MOVNTQ" %%mm7, 56(%0,%2,2)" | |||
| :: "r"(d), "r"(s2), "r"(x) | |||
| :"memory"); | |||
| } | |||
| for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; | |||