| @@ -0,0 +1,36 @@ | |||
| /* | |||
| * Copyright (c) 2019 Paul B Mahol | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVFILTER_ATADENOISE_H | |||
| #define AVFILTER_ATADENOISE_H | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| typedef struct ATADenoiseDSPContext { | |||
| void (*filter_row)(const uint8_t *src, uint8_t *dst, | |||
| const uint8_t **srcf, | |||
| int w, int mid, int size, | |||
| int thra, int thrb); | |||
| } ATADenoiseDSPContext; | |||
| void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth); | |||
| #endif /* AVFILTER_ATADENOISE_H */ | |||
| @@ -33,6 +33,7 @@ | |||
| #define FF_BUFQUEUE_SIZE 129 | |||
| #include "bufferqueue.h" | |||
| #include "atadenoise.h" | |||
| #include "formats.h" | |||
| #include "internal.h" | |||
| #include "video.h" | |||
| @@ -57,10 +58,8 @@ typedef struct ATADenoiseContext { | |||
| int available; | |||
| int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); | |||
| void (*filter_row)(const uint8_t *src, uint8_t *dst, | |||
| const uint8_t *srcf[SIZE], | |||
| int w, int mid, int size, | |||
| int thra, int thrb); | |||
| ATADenoiseDSPContext dsp; | |||
| } ATADenoiseContext; | |||
| #define OFFSET(x) offsetof(ATADenoiseContext, x) | |||
| @@ -209,7 +208,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) | |||
| srcf[i] = data[i] + slice_start * linesize[i]; | |||
| for (y = slice_start; y < slice_end; y++) { | |||
| s->filter_row(src, dst, srcf, w, mid, size, thra, thrb); | |||
| s->dsp.filter_row(src, dst, srcf, w, mid, size, thra, thrb); | |||
| dst += out->linesize[p]; | |||
| src += in->linesize[p]; | |||
| @@ -239,9 +238,9 @@ static int config_input(AVFilterLink *inlink) | |||
| depth = desc->comp[0].depth; | |||
| s->filter_slice = filter_slice; | |||
| if (depth == 8) | |||
| s->filter_row = filter_row8; | |||
| s->dsp.filter_row = filter_row8; | |||
| else | |||
| s->filter_row = filter_row16; | |||
| s->dsp.filter_row = filter_row16; | |||
| s->thra[0] = s->fthra[0] * (1 << depth) - 1; | |||
| s->thra[1] = s->fthra[1] * (1 << depth) - 1; | |||
| @@ -250,6 +249,9 @@ static int config_input(AVFilterLink *inlink) | |||
| s->thrb[1] = s->fthrb[1] * (1 << depth) - 1; | |||
| s->thrb[2] = s->fthrb[2] * (1 << depth) - 1; | |||
| if (ARCH_X86) | |||
| ff_atadenoise_init_x86(&s->dsp, depth); | |||
| return 0; | |||
| } | |||
| @@ -2,6 +2,7 @@ OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o | |||
| OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o | |||
| OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o | |||
| OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o | |||
| OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o | |||
| OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o | |||
| OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o | |||
| @@ -39,6 +40,7 @@ X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o | |||
| X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o | |||
| X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o | |||
| X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o | |||
| X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o | |||
| X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o | |||
| X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o | |||
| @@ -0,0 +1,154 @@ | |||
| ;***************************************************************************** | |||
| ;* x86-optimized functions for blend filter | |||
| ;* | |||
| ;* Copyright (C) 2019 Paul B Mahol | |||
| ;* | |||
| ;* This file is part of FFmpeg. | |||
| ;* | |||
| ;* FFmpeg is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with FFmpeg; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;****************************************************************************** | |||
| %if ARCH_X86_64 | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION_RODATA | |||
| pw_one: times 8 dw 1 | |||
| pw_ones: times 8 dw 65535 | |||
| SECTION .text | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_filter_row(const uint8_t *src, uint8_t *dst, | |||
| ; const uint8_t **srcf, | |||
| ; int w, int mid, int size, | |||
| ; int thra, int thrb) | |||
| ;------------------------------------------------------------------------------ | |||
| INIT_XMM sse4 | |||
| cglobal atadenoise_filter_row8, 8,10,13, src, dst, srcf, w, mid, size, i, j, srcfx, x | |||
| movsxdifnidn wq, wd | |||
| movsxdifnidn midq, midd | |||
| movsxdifnidn sizeq, sized | |||
| add srcq, wq | |||
| add dstq, wq | |||
| mov xq, wq | |||
| dec sizeq | |||
| neg xq | |||
| movd m4, r6m | |||
| SPLATW m4, m4 | |||
| movd m5, r7m | |||
| SPLATW m5, m5 | |||
| pxor m2, m2 | |||
| mova m10, [pw_ones] | |||
| .loop: | |||
| mov iq, midq | |||
| mov jq, midq | |||
| pxor m3, m3 | |||
| pxor m11, m11 | |||
| movu m0, [srcq + xq] | |||
| punpcklbw m0, m2 | |||
| mova m7, m0 | |||
| mova m8, [pw_one] | |||
| mova m12, [pw_ones] | |||
| .loop0: | |||
| inc iq | |||
| dec jq | |||
| mov srcfxq, [srcfq + jq * 8] | |||
| add srcfxq, wq | |||
| movu m1, [srcfxq + xq] | |||
| punpcklbw m1, m2 | |||
| mova m9, m1 | |||
| psubw m1, m0 | |||
| pabsw m1, m1 | |||
| paddw m11, m1 | |||
| pcmpgtw m1, m4 | |||
| mova m6, m11 | |||
| pcmpgtw m6, m5 | |||
| por m6, m1 | |||
| pxor m6, m10 | |||
| pand m12, m6 | |||
| pand m9, m12 | |||
| paddw m7, m9 | |||
| mova m6, m12 | |||
| psrlw m6, 15 | |||
| paddw m8, m6 | |||
| mov srcfxq, [srcfq + iq * 8] | |||
| add srcfxq, wq | |||
| movu m1, [srcfxq + xq] | |||
| punpcklbw m1, m2 | |||
| mova m9, m1 | |||
| psubw m1, m0 | |||
| pabsw m1, m1 | |||
| paddw m3, m1 | |||
| pcmpgtw m1, m4 | |||
| mova m6, m3 | |||
| pcmpgtw m6, m5 | |||
| por m6, m1 | |||
| pxor m6, m10 | |||
| pand m12, m6 | |||
| pand m9, m12 | |||
| paddw m7, m9 | |||
| mova m6, m12 | |||
| psrlw m6, 15 | |||
| paddw m8, m6 | |||
| ptest m12, m12 | |||
| jz .finish | |||
| cmp iq, sizeq | |||
| jl .loop0 | |||
| .finish: | |||
| mova m9, m8 | |||
| psrlw m9, 1 | |||
| paddw m7, m9 | |||
| mova m1, m7 | |||
| mova m6, m8 | |||
| punpcklwd m7, m2 | |||
| punpcklwd m8, m2 | |||
| cvtdq2ps m7, m7 | |||
| cvtdq2ps m8, m8 | |||
| divps m7, m8 | |||
| cvttps2dq m7, m7 | |||
| packssdw m7, m7 | |||
| packuswb m7, m7 | |||
| movd [dstq + xq], m7 | |||
| punpckhwd m1, m2 | |||
| punpckhwd m6, m2 | |||
| cvtdq2ps m1, m1 | |||
| cvtdq2ps m6, m6 | |||
| divps m1, m6 | |||
| cvttps2dq m1, m1 | |||
| packssdw m1, m1 | |||
| packuswb m1, m1 | |||
| movd [dstq + xq + 4], m1 | |||
| add xq, mmsize/2 | |||
| jl .loop | |||
| RET | |||
| %endif | |||
| @@ -0,0 +1,40 @@ | |||
| /* | |||
| * Copyright (C) 2019 Paul B Mahol | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavfilter/atadenoise.h" | |||
| void ff_atadenoise_filter_row8_sse4(const uint8_t *src, uint8_t *dst, | |||
| const uint8_t **srcf, | |||
| int w, int mid, int size, | |||
| int thra, int thrb); | |||
| av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8) { | |||
| dsp->filter_row = ff_atadenoise_filter_row8_sse4; | |||
| } | |||
| } | |||