| @@ -27,6 +27,8 @@ | |||
| #include "audio.h" | |||
| #include "formats.h" | |||
| #include "af_anlmdndsp.h" | |||
| #define SQR(x) ((x) * (x)) | |||
| typedef struct AudioNLMeansContext { | |||
| @@ -49,7 +51,7 @@ typedef struct AudioNLMeansContext { | |||
| AVAudioFifo *fifo; | |||
| float (*compute_distance)(const float *f1, const float *f2, int K); | |||
| AudioNLMDNDSPContext dsp; | |||
| } AudioNLMeansContext; | |||
| #define OFFSET(x) offsetof(AudioNLMeansContext, x) | |||
| @@ -93,7 +95,7 @@ static int query_formats(AVFilterContext *ctx) | |||
| return ff_set_common_samplerates(ctx, formats); | |||
| } | |||
| static float compute_distance_ssd(const float *f1, const float *f2, int K) | |||
| static float compute_distance_ssd_c(const float *f1, const float *f2, ptrdiff_t K) | |||
| { | |||
| float distance = 0.; | |||
| @@ -103,6 +105,25 @@ static float compute_distance_ssd(const float *f1, const float *f2, int K) | |||
| return distance; | |||
| } | |||
| static void compute_cache_c(float *cache, const float *f, | |||
| ptrdiff_t S, ptrdiff_t K, | |||
| ptrdiff_t i, ptrdiff_t jj) | |||
| { | |||
| int v = 0; | |||
| for (int j = jj; j < jj + S; j++, v++) | |||
| cache[v] += -SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | |||
| } | |||
| void ff_anlmdn_init(AudioNLMDNDSPContext *dsp) | |||
| { | |||
| dsp->compute_distance_ssd = compute_distance_ssd_c; | |||
| dsp->compute_cache = compute_cache_c; | |||
| if (ARCH_X86) | |||
| ff_anlmdn_init_x86(dsp); | |||
| } | |||
| static int config_output(AVFilterLink *outlink) | |||
| { | |||
| AVFilterContext *ctx = outlink->src; | |||
| @@ -129,7 +150,7 @@ static int config_output(AVFilterLink *outlink) | |||
| if (!s->fifo) | |||
| return AVERROR(ENOMEM); | |||
| s->compute_distance = compute_distance_ssd; | |||
| ff_anlmdn_init(&s->dsp); | |||
| return 0; | |||
| } | |||
| @@ -153,17 +174,14 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs) | |||
| for (int j = i - S; j <= i + S; j++) { | |||
| if (i == j) | |||
| continue; | |||
| cache[v++] = s->compute_distance(f + i, f + j, K); | |||
| cache[v++] = s->dsp.compute_distance_ssd(f + i, f + j, K); | |||
| } | |||
| } else { | |||
| for (int j = i - S; j < i; j++, v++) | |||
| cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | |||
| for (int j = i + 1; j <= i + S; j++, v++) | |||
| cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | |||
| s->dsp.compute_cache(cache, f, S, K, i, i - S); | |||
| s->dsp.compute_cache(cache + S, f, S, K, i, i + 1); | |||
| } | |||
| for (int j = 0; j < v; j++) { | |||
| for (int j = 0; j < 2 * S; j++) { | |||
| const float distance = cache[j]; | |||
| float w; | |||
| @@ -0,0 +1,40 @@ | |||
| /* | |||
| * Copyright (c) 2019 Paul B Mahol | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVFILTER_ANLMDNDSP_H | |||
| #define AVFILTER_ANLMDNDSP_H | |||
| #include "libavutil/common.h" | |||
| #include "audio.h" | |||
| #include "avfilter.h" | |||
| #include "formats.h" | |||
| #include "internal.h" | |||
| typedef struct AudioNLMDNDSPContext { | |||
| float (*compute_distance_ssd)(const float *f1, const float *f2, ptrdiff_t K); | |||
| void (*compute_cache)(float *cache, const float *f, ptrdiff_t S, ptrdiff_t K, | |||
| ptrdiff_t i, ptrdiff_t jj); | |||
| } AudioNLMDNDSPContext; | |||
| void ff_anlmdn_init(AudioNLMDNDSPContext *s); | |||
| void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s); | |||
| #endif /* AVFILTER_ANLMDNDSP_H */ | |||
| @@ -1,6 +1,7 @@ | |||
| OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o | |||
| OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o | |||
| OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o | |||
| OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o | |||
| OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o | |||
| OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o | |||
| @@ -34,6 +35,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o | |||
| X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o | |||
| X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o | |||
| X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o | |||
| X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o | |||
| X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o | |||
| X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o | |||
| @@ -0,0 +1,80 @@ | |||
| ;***************************************************************************** | |||
| ;* x86-optimized functions for anlmdn filter | |||
| ;* Copyright (c) 2017 Paul B Mahol | |||
| ;* | |||
| ;* This file is part of FFmpeg. | |||
| ;* | |||
| ;* FFmpeg is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with FFmpeg; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;****************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION .text | |||
| ;------------------------------------------------------------------------------ | |||
| ; float ff_compute_distance_ssd(float *f1, const float *f2, ptrdiff_t len) | |||
| ;------------------------------------------------------------------------------ | |||
| INIT_XMM sse | |||
| cglobal compute_distance_ssd, 3,5,3, f1, f2, len, r, x | |||
| mov xq, lenq | |||
| shl xq, 2 | |||
| neg xq | |||
| add f1q, xq | |||
| add f2q, xq | |||
| xor xq, xq | |||
| shl lenq, 1 | |||
| add lenq, 1 | |||
| shl lenq, 2 | |||
| mov rq, lenq | |||
| and rq, mmsize - 1 | |||
| xorps m0, m0 | |||
| cmp lenq, mmsize | |||
| jl .loop1 | |||
| sub lenq, rq | |||
| ALIGN 16 | |||
| .loop0: | |||
| movups m1, [f1q + xq] | |||
| movups m2, [f2q + xq] | |||
| subps m1, m2 | |||
| mulps m1, m1 | |||
| addps m0, m1 | |||
| add xq, mmsize | |||
| cmp xq, lenq | |||
| jl .loop0 | |||
| movhlps xmm1, xmm0 | |||
| addps xmm0, xmm1 | |||
| movss xmm1, xmm0 | |||
| shufps xmm0, xmm0, 1 | |||
| addss xmm0, xmm1 | |||
| cmp rq, 0 | |||
| je .end | |||
| add lenq, rq | |||
| .loop1: | |||
| movss xm1, [f1q + xq] | |||
| subss xm1, [f2q + xq] | |||
| mulss xm1, xm1 | |||
| addss xm0, xm1 | |||
| add xq, 4 | |||
| cmp xq, lenq | |||
| jl .loop1 | |||
| .end: | |||
| %if ARCH_X86_64 == 0 | |||
| movss r0m, xm0 | |||
| fld dword r0m | |||
| %endif | |||
| RET | |||
| @@ -0,0 +1,35 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavfilter/af_anlmdndsp.h" | |||
| float ff_compute_distance_ssd_sse(const float *f1, const float *f2, | |||
| ptrdiff_t len); | |||
| av_cold void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (EXTERNAL_SSE(cpu_flags)) { | |||
| s->compute_distance_ssd = ff_compute_distance_ssd_sse; | |||
| } | |||
| } | |||