| @@ -22,10 +22,35 @@ | |||||
| */ | */ | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "v210dec.h" | |||||
| #include "libavutil/bswap.h" | #include "libavutil/bswap.h" | ||||
| #include "libavutil/x86/timer.h" | |||||
| #define READ_PIXELS(a, b, c) \ | |||||
| do { \ | |||||
| val = av_le2ne32(*src++); \ | |||||
| *a++ = val & 0x3FF; \ | |||||
| *b++ = (val >> 10) & 0x3FF; \ | |||||
| *c++ = (val >> 20) & 0x3FF; \ | |||||
| } while (0) | |||||
| static void v210_planar_unpack_c(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width) | |||||
| { | |||||
| uint32_t val; | |||||
| int i; | |||||
| for( i = 0; i < width-5; i += 6 ){ | |||||
| READ_PIXELS(u, y, v); | |||||
| READ_PIXELS(y, u, y); | |||||
| READ_PIXELS(v, y, u); | |||||
| READ_PIXELS(y, v, y); | |||||
| } | |||||
| } | |||||
| static av_cold int decode_init(AVCodecContext *avctx) | static av_cold int decode_init(AVCodecContext *avctx) | ||||
| { | { | ||||
| V210DecContext *s = avctx->priv_data; | |||||
| if (avctx->width & 1) { | if (avctx->width & 1) { | ||||
| av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); | av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); | ||||
| return -1; | return -1; | ||||
| @@ -35,18 +60,37 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
| avctx->coded_frame = avcodec_alloc_frame(); | avctx->coded_frame = avcodec_alloc_frame(); | ||||
| s->unpack_frame = v210_planar_unpack_c; | |||||
| if (HAVE_MMX) | |||||
| v210_x86_init(s); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, | static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, | ||||
| AVPacket *avpkt) | AVPacket *avpkt) | ||||
| { | { | ||||
| int h, w; | |||||
| V210DecContext *s = avctx->priv_data; | |||||
| int h, w, stride, aligned_input; | |||||
| AVFrame *pic = avctx->coded_frame; | AVFrame *pic = avctx->coded_frame; | ||||
| const uint8_t *psrc = avpkt->data; | const uint8_t *psrc = avpkt->data; | ||||
| uint16_t *y, *u, *v; | uint16_t *y, *u, *v; | ||||
| int aligned_width = ((avctx->width + 47) / 48) * 48; | |||||
| int stride = aligned_width * 8 / 3; | |||||
| if (s->custom_stride ) | |||||
| stride = s->custom_stride; | |||||
| else { | |||||
| int aligned_width = ((avctx->width + 47) / 48) * 48; | |||||
| stride = aligned_width * 8 / 3; | |||||
| } | |||||
| aligned_input = !((uintptr_t)psrc & 0xf) && !(stride & 0xf); | |||||
| if (aligned_input != s->aligned_input) { | |||||
| s->aligned_input = aligned_input; | |||||
| if (HAVE_MMX) | |||||
| v210_x86_init(s); | |||||
| } | |||||
| if (pic->data[0]) | if (pic->data[0]) | ||||
| avctx->release_buffer(avctx, pic); | avctx->release_buffer(avctx, pic); | ||||
| @@ -66,23 +110,18 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| pic->pict_type = AV_PICTURE_TYPE_I; | pic->pict_type = AV_PICTURE_TYPE_I; | ||||
| pic->key_frame = 1; | pic->key_frame = 1; | ||||
| #define READ_PIXELS(a, b, c) \ | |||||
| do { \ | |||||
| val = av_le2ne32(*src++); \ | |||||
| *a++ = val & 0x3FF; \ | |||||
| *b++ = (val >> 10) & 0x3FF; \ | |||||
| *c++ = (val >> 20) & 0x3FF; \ | |||||
| } while (0) | |||||
| for (h = 0; h < avctx->height; h++) { | for (h = 0; h < avctx->height; h++) { | ||||
| const uint32_t *src = (const uint32_t*)psrc; | const uint32_t *src = (const uint32_t*)psrc; | ||||
| uint32_t val; | uint32_t val; | ||||
| for (w = 0; w < avctx->width - 5; w += 6) { | |||||
| READ_PIXELS(u, y, v); | |||||
| READ_PIXELS(y, u, y); | |||||
| READ_PIXELS(v, y, u); | |||||
| READ_PIXELS(y, v, y); | |||||
| } | |||||
| w = (avctx->width / 6) * 6; | |||||
| s->unpack_frame(src, y, u, v, w); | |||||
| y += w; | |||||
| u += w >> 1; | |||||
| v += w >> 1; | |||||
| src += (w << 1) / 3; | |||||
| if (w < avctx->width - 1) { | if (w < avctx->width - 1) { | ||||
| READ_PIXELS(u, y, v); | READ_PIXELS(u, y, v); | ||||
| @@ -120,13 +159,29 @@ static av_cold int decode_close(AVCodecContext *avctx) | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| #define V210DEC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | |||||
| static const AVOption v210dec_options[] = { | |||||
| {"custom_stride", "Custom V210 stride", offsetof(V210DecContext, custom_stride), FF_OPT_TYPE_INT, | |||||
| {.dbl = 0}, INT_MIN, INT_MAX, V210DEC_FLAGS}, | |||||
| {NULL} | |||||
| }; | |||||
| static const AVClass v210dec_class = { | |||||
| "V210 Decoder", | |||||
| av_default_item_name, | |||||
| v210dec_options, | |||||
| LIBAVUTIL_VERSION_INT, | |||||
| }; | |||||
| AVCodec ff_v210_decoder = { | AVCodec ff_v210_decoder = { | ||||
| .name = "v210", | .name = "v210", | ||||
| .type = AVMEDIA_TYPE_VIDEO, | .type = AVMEDIA_TYPE_VIDEO, | ||||
| .id = CODEC_ID_V210, | .id = CODEC_ID_V210, | ||||
| .priv_data_size = sizeof(V210DecContext), | |||||
| .init = decode_init, | .init = decode_init, | ||||
| .close = decode_close, | .close = decode_close, | ||||
| .decode = decode_frame, | .decode = decode_frame, | ||||
| .capabilities = CODEC_CAP_DR1, | .capabilities = CODEC_CAP_DR1, | ||||
| .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), | .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), | ||||
| .priv_class = &v210dec_class, | |||||
| }; | }; | ||||
| @@ -0,0 +1,34 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_V210DEC_H | |||||
| #define AVCODEC_V210DEC_H | |||||
| #include "libavutil/log.h" | |||||
| #include "libavutil/opt.h" | |||||
| typedef struct { | |||||
| AVClass *av_class; | |||||
| int custom_stride; | |||||
| int aligned_input; | |||||
| void (*unpack_frame)(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); | |||||
| } V210DecContext; | |||||
| void v210_x86_init(V210DecContext *s); | |||||
| #endif /* AVCODEC_V210DEC_H */ | |||||
| @@ -39,6 +39,8 @@ MMX-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp-init.o | |||||
| YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o | YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o | ||||
| MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o | MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o | ||||
| MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o | MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o | ||||
| YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o | |||||
| MMX-OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o | |||||
| MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o | MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o | ||||
| YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o | YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o | ||||
| YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o | YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o | ||||
| @@ -0,0 +1,48 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavcodec/v210dec.h" | |||||
| extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); | |||||
| extern void ff_v210_planar_unpack_unaligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); | |||||
| extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); | |||||
| extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); | |||||
| av_cold void v210_x86_init(V210DecContext *s) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| #if HAVE_YASM | |||||
| if (s->aligned_input) { | |||||
| if (cpu_flags & AV_CPU_FLAG_SSSE3) | |||||
| s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3; | |||||
| if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
| s->unpack_frame = ff_v210_planar_unpack_aligned_avx; | |||||
| } | |||||
| else { | |||||
| if (cpu_flags & AV_CPU_FLAG_SSSE3) | |||||
| s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3; | |||||
| if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
| s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| @@ -0,0 +1,85 @@ | |||||
| ;****************************************************************************** | |||||
| ;* V210 SIMD unpack | |||||
| ;* Copyright (c) 2011 Loren Merritt <lorenm@u.washington.edu> | |||||
| ;* Copyright (c) 2011 Kieran Kunhya <kieran@kunhya.com> | |||||
| ;* | |||||
| ;* This file is part of Libav. | |||||
| ;* | |||||
| ;* Libav is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* Libav is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with Libav; if not, write to the Free Software | |||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "libavutil/x86/x86inc.asm" | |||||
| %include "libavutil/x86/x86util.asm" | |||||
| SECTION_RODATA | |||||
| v210_mask: times 4 dd 0x3ff | |||||
| v210_mult: dw 64,4,64,4,64,4,64,4 | |||||
| v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1 | |||||
| v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1 | |||||
| SECTION .text | |||||
| %macro v210_planar_unpack 2 | |||||
| ; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width) | |||||
| cglobal v210_planar_unpack_%1_%2, 5, 5 | |||||
| movsxdifnidn r4, r4d | |||||
| lea r1, [r1+2*r4] | |||||
| add r2, r4 | |||||
| add r3, r4 | |||||
| neg r4 | |||||
| mova m3, [v210_mult] | |||||
| mova m4, [v210_mask] | |||||
| mova m5, [v210_luma_shuf] | |||||
| mova m6, [v210_chroma_shuf] | |||||
| .loop | |||||
| %ifidn %1, unaligned | |||||
| movu m0, [r0] | |||||
| %else | |||||
| mova m0, [r0] | |||||
| %endif | |||||
| pmullw m1, m0, m3 | |||||
| psrld m0, 10 | |||||
| psrlw m1, 6 ; u0 v0 y1 y2 v1 u2 y4 y5 | |||||
| pand m0, m4 ; y0 __ u1 __ y3 __ v2 __ | |||||
| shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __ | |||||
| pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __ | |||||
| movu [r1+2*r4], m2 | |||||
| shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __ | |||||
| pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __ | |||||
| movq [r2+r4], m1 | |||||
| movhps [r3+r4], m1 | |||||
| add r0, mmsize | |||||
| add r4, 6 | |||||
| jl .loop | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| v210_planar_unpack unaligned, ssse3 | |||||
| INIT_AVX | |||||
| v210_planar_unpack unaligned, avx | |||||
| INIT_XMM | |||||
| v210_planar_unpack aligned, ssse3 | |||||
| INIT_AVX | |||||
| v210_planar_unpack aligned, avx | |||||