Signed-off-by: Paul B Mahol <onemda@gmail.com>tags/n3.0
| @@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \ | |||||
| h263.o ituh263enc.o | h263.o ituh263enc.o | ||||
| OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o | OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o | ||||
| OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o | OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o | ||||
| OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o | |||||
| OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o takdsp.o | |||||
| OBJS-$(CONFIG_TARGA_DECODER) += targa.o | OBJS-$(CONFIG_TARGA_DECODER) += targa.o | ||||
| OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o | OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o | ||||
| OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o | OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o | ||||
| @@ -28,6 +28,7 @@ | |||||
| #include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
| #include "libavutil/samplefmt.h" | #include "libavutil/samplefmt.h" | ||||
| #include "tak.h" | #include "tak.h" | ||||
| #include "takdsp.h" | |||||
| #include "audiodsp.h" | #include "audiodsp.h" | ||||
| #include "thread.h" | #include "thread.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| @@ -47,6 +48,7 @@ typedef struct MCDParam { | |||||
| typedef struct TAKDecContext { | typedef struct TAKDecContext { | ||||
| AVCodecContext *avctx; ///< parent AVCodecContext | AVCodecContext *avctx; ///< parent AVCodecContext | ||||
| AudioDSPContext adsp; | AudioDSPContext adsp; | ||||
| TAKDSPContext tdsp; | |||||
| TAKStreamInfo ti; | TAKStreamInfo ti; | ||||
| GetBitContext gb; ///< bitstream reader initialized to start at the current frame | GetBitContext gb; ///< bitstream reader initialized to start at the current frame | ||||
| @@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) | |||||
| TAKDecContext *s = avctx->priv_data; | TAKDecContext *s = avctx->priv_data; | ||||
| ff_audiodsp_init(&s->adsp); | ff_audiodsp_init(&s->adsp); | ||||
| ff_takdsp_init(&s->tdsp); | |||||
| s->avctx = avctx; | s->avctx = avctx; | ||||
| avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; | avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; | ||||
| @@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan) | |||||
| static int decorrelate(TAKDecContext *s, int c1, int c2, int length) | static int decorrelate(TAKDecContext *s, int c1, int c2, int length) | ||||
| { | { | ||||
| GetBitContext *gb = &s->gb; | GetBitContext *gb = &s->gb; | ||||
| int32_t *p1 = s->decoded[c1] + 1; | |||||
| int32_t *p2 = s->decoded[c2] + 1; | |||||
| int32_t *p1 = s->decoded[c1] + (s->dmode > 5); | |||||
| int32_t *p2 = s->decoded[c2] + (s->dmode > 5); | |||||
| int32_t bp1 = p1[0]; | |||||
| int32_t bp2 = p2[0]; | |||||
| int i; | int i; | ||||
| int dshift, dfactor; | int dshift, dfactor; | ||||
| length += s->dmode < 6; | |||||
| switch (s->dmode) { | switch (s->dmode) { | ||||
| case 1: /* left/side */ | case 1: /* left/side */ | ||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| p2[i] = a + b; | |||||
| } | |||||
| s->tdsp.decorrelate_ls(p1, p2, length); | |||||
| break; | break; | ||||
| case 2: /* side/right */ | case 2: /* side/right */ | ||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| p1[i] = b - a; | |||||
| } | |||||
| s->tdsp.decorrelate_sr(p1, p2, length); | |||||
| break; | break; | ||||
| case 3: /* side/mid */ | case 3: /* side/mid */ | ||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| a -= b >> 1; | |||||
| p1[i] = a; | |||||
| p2[i] = a + b; | |||||
| } | |||||
| s->tdsp.decorrelate_sm(p1, p2, length); | |||||
| break; | break; | ||||
| case 4: /* side/left with scale factor */ | case 4: /* side/left with scale factor */ | ||||
| FFSWAP(int32_t*, p1, p2); | FFSWAP(int32_t*, p1, p2); | ||||
| FFSWAP(int32_t, bp1, bp2); | |||||
| case 5: /* side/right with scale factor */ | case 5: /* side/right with scale factor */ | ||||
| dshift = get_bits_esc4(gb); | dshift = get_bits_esc4(gb); | ||||
| dfactor = get_sbits(gb, 10); | dfactor = get_sbits(gb, 10); | ||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| b = dfactor * (b >> dshift) + 128 >> 8 << dshift; | |||||
| p1[i] = b - a; | |||||
| } | |||||
| s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor); | |||||
| break; | break; | ||||
| case 6: | case 6: | ||||
| FFSWAP(int32_t*, p1, p2); | FFSWAP(int32_t*, p1, p2); | ||||
| @@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) | |||||
| } | } | ||||
| } | } | ||||
| if (s->dmode > 0 && s->dmode < 6) { | |||||
| p1[0] = bp1; | |||||
| p2[0] = bp2; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -0,0 +1,82 @@ | |||||
| /* | |||||
| * TAK decoder | |||||
| * Copyright (c) 2015 Paul B Mahol | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/attributes.h" | |||||
| #include "takdsp.h" | |||||
| #include "config.h" | |||||
| static void decorrelate_ls(int32_t *p1, int32_t *p2, int length) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| p2[i] = a + b; | |||||
| } | |||||
| } | |||||
| static void decorrelate_sr(int32_t *p1, int32_t *p2, int length) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| p1[i] = b - a; | |||||
| } | |||||
| } | |||||
| static void decorrelate_sm(int32_t *p1, int32_t *p2, int length) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| a -= b >> 1; | |||||
| p1[i] = a; | |||||
| p2[i] = a + b; | |||||
| } | |||||
| } | |||||
| static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < length; i++) { | |||||
| int32_t a = p1[i]; | |||||
| int32_t b = p2[i]; | |||||
| b = dfactor * (b >> dshift) + 128 >> 8 << dshift; | |||||
| p1[i] = b - a; | |||||
| } | |||||
| } | |||||
| av_cold void ff_takdsp_init(TAKDSPContext *c) | |||||
| { | |||||
| c->decorrelate_ls = decorrelate_ls; | |||||
| c->decorrelate_sr = decorrelate_sr; | |||||
| c->decorrelate_sm = decorrelate_sm; | |||||
| c->decorrelate_sf = decorrelate_sf; | |||||
| if (ARCH_X86) | |||||
| ff_takdsp_init_x86(c); | |||||
| } | |||||
| @@ -0,0 +1,34 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_TAKDSP_H | |||||
| #define AVCODEC_TAKDSP_H | |||||
| #include <stdint.h> | |||||
| typedef struct TAKDSPContext { | |||||
| void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length); | |||||
| void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length); | |||||
| void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length); | |||||
| void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor); | |||||
| } TAKDSPContext; | |||||
| void ff_takdsp_init(TAKDSPContext *c); | |||||
| void ff_takdsp_init_x86(TAKDSPContext *c); | |||||
| #endif /* AVCODEC_TAKDSP_H */ | |||||
| @@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o | |||||
| OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o | OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o | ||||
| OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o | OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o | ||||
| OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o | OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o | ||||
| OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o | |||||
| OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o | OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o | ||||
| OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o | OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o | ||||
| OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o | OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o | ||||
| @@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o | |||||
| YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o | YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o | ||||
| YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o | YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o | ||||
| YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o | YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o | ||||
| YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o | |||||
| YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | ||||
| YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o | YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o | ||||
| YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o | YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o | ||||
| @@ -0,0 +1,116 @@ | |||||
| ;****************************************************************************** | |||||
| ;* TAK DSP SIMD optimizations | |||||
| ;* | |||||
| ;* Copyright (C) 2015 Paul B Mahol | |||||
| ;* | |||||
| ;* This file is part of FFmpeg. | |||||
| ;* | |||||
| ;* FFmpeg is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with FFmpeg; if not, write to the Free Software | |||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "libavutil/x86/x86util.asm" | |||||
| SECTION_RODATA | |||||
| pd_128: times 4 dd 128 | |||||
| SECTION .text | |||||
| INIT_XMM sse2 | |||||
| cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length | |||||
| shl lengthd, 2 | |||||
| add p1q, lengthq | |||||
| add p2q, lengthq | |||||
| neg lengthq | |||||
| .loop: | |||||
| mova m0, [p1q+lengthq+mmsize*0] | |||||
| mova m1, [p1q+lengthq+mmsize*1] | |||||
| paddd m0, [p2q+lengthq+mmsize*0] | |||||
| paddd m1, [p2q+lengthq+mmsize*1] | |||||
| mova [p2q+lengthq+mmsize*0], m0 | |||||
| mova [p2q+lengthq+mmsize*1], m1 | |||||
| add lengthq, mmsize*2 | |||||
| jl .loop | |||||
| REP_RET | |||||
| cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length | |||||
| shl lengthd, 2 | |||||
| add p1q, lengthq | |||||
| add p2q, lengthq | |||||
| neg lengthq | |||||
| .loop: | |||||
| mova m0, [p2q+lengthq+mmsize*0] | |||||
| mova m1, [p2q+lengthq+mmsize*1] | |||||
| psubd m0, [p1q+lengthq+mmsize*0] | |||||
| psubd m1, [p1q+lengthq+mmsize*1] | |||||
| mova [p1q+lengthq+mmsize*0], m0 | |||||
| mova [p1q+lengthq+mmsize*1], m1 | |||||
| add lengthq, mmsize*2 | |||||
| jl .loop | |||||
| REP_RET | |||||
| cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length | |||||
| shl lengthd, 2 | |||||
| add p1q, lengthq | |||||
| add p2q, lengthq | |||||
| neg lengthq | |||||
| .loop: | |||||
| mova m0, [p1q+lengthq] | |||||
| mova m1, [p2q+lengthq] | |||||
| mova m3, [p1q+lengthq+mmsize] | |||||
| mova m4, [p2q+lengthq+mmsize] | |||||
| mova m2, m1 | |||||
| mova m5, m4 | |||||
| psrld m2, 1 | |||||
| psrld m5, 1 | |||||
| psubd m0, m2 | |||||
| psubd m3, m5 | |||||
| paddd m1, m0 | |||||
| paddd m4, m3 | |||||
| mova [p1q+lengthq], m0 | |||||
| mova [p2q+lengthq], m1 | |||||
| mova [p1q+lengthq+mmsize], m3 | |||||
| mova [p2q+lengthq+mmsize], m4 | |||||
| add lengthq, mmsize*2 | |||||
| jl .loop | |||||
| REP_RET | |||||
| INIT_XMM sse4 | |||||
| cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor | |||||
| shl lengthd, 2 | |||||
| add p1q, lengthq | |||||
| add p2q, lengthq | |||||
| neg lengthq | |||||
| movd m2, dshiftm | |||||
| movd m3, dfactorm | |||||
| pshufd m3, m3, 0 | |||||
| mova m4, [pd_128] | |||||
| .loop: | |||||
| mova m0, [p1q+lengthq] | |||||
| mova m1, [p2q+lengthq] | |||||
| psrld m1, m2 | |||||
| pmulld m1, m3 | |||||
| paddd m1, m4 | |||||
| psrld m1, 8 | |||||
| pslld m1, m2 | |||||
| psubd m1, m0 | |||||
| mova [p1q+lengthq], m1 | |||||
| add lengthq, mmsize | |||||
| jl .loop | |||||
| REP_RET | |||||
| @@ -0,0 +1,45 @@ | |||||
| /* | |||||
| * Copyright (c) 2015 Paul B Mahol | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavcodec/takdsp.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "config.h" | |||||
| void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length); | |||||
| void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length); | |||||
| void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length); | |||||
| void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor); | |||||
| av_cold void ff_takdsp_init_x86(TAKDSPContext *c) | |||||
| { | |||||
| #if HAVE_YASM | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||||
| c->decorrelate_ls = ff_tak_decorrelate_ls_sse2; | |||||
| c->decorrelate_sr = ff_tak_decorrelate_sr_sse2; | |||||
| c->decorrelate_sm = ff_tak_decorrelate_sm_sse2; | |||||
| } | |||||
| if (EXTERNAL_SSE4(cpu_flags)) { | |||||
| c->decorrelate_sf = ff_tak_decorrelate_sf_sse4; | |||||
| } | |||||
| #endif | |||||
| } | |||||