integration by Neil Birkbeck, with help from Vitor Sessak. core SSE2 loop by Skal (pascal.massimino@gmail.com) Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.4
| @@ -353,6 +353,7 @@ Filters: | |||
| vf_extractplanes.c Paul B Mahol | |||
| vf_histogram.c Paul B Mahol | |||
| vf_hqx.c Clément Bœsch | |||
| vf_idec.c Pascal Massimino | |||
| vf_il.c Paul B Mahol | |||
| vf_lenscorrection.c Daniel Oberhoff | |||
| vf_mergeplanes.c Paul B Mahol | |||
| @@ -23,37 +23,8 @@ | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "avfilter.h" | |||
| #include "internal.h" | |||
| #define HIST_SIZE 4 | |||
| typedef enum { | |||
| TFF, | |||
| BFF, | |||
| PROGRSSIVE, | |||
| UNDETERMINED, | |||
| } Type; | |||
| typedef struct { | |||
| const AVClass *class; | |||
| float interlace_threshold; | |||
| float progressive_threshold; | |||
| Type last_type; | |||
| int prestat[4]; | |||
| int poststat[4]; | |||
| uint8_t history[HIST_SIZE]; | |||
| AVFrame *cur; | |||
| AVFrame *next; | |||
| AVFrame *prev; | |||
| int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w); | |||
| const AVPixFmtDescriptor *csp; | |||
| } IDETContext; | |||
| #include "vf_idet.h" | |||
| #define OFFSET(x) offsetof(IDETContext, x) | |||
| #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | |||
| @@ -77,7 +48,7 @@ static const char *type2str(Type type) | |||
| return NULL; | |||
| } | |||
| static int filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w) | |||
| int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w) | |||
| { | |||
| int x; | |||
| int ret=0; | |||
| @@ -271,7 +242,10 @@ static av_cold int init(AVFilterContext *ctx) | |||
| idet->last_type = UNDETERMINED; | |||
| memset(idet->history, UNDETERMINED, HIST_SIZE); | |||
| idet->filter_line = filter_line_c; | |||
| idet->filter_line = ff_idet_filter_line_c; | |||
| if (ARCH_X86) | |||
| ff_idet_init_x86(idet); | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,58 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License along | |||
| * with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVFILTER_IDET_H | |||
| #define AVFILTER_IDET_H | |||
| #include "libavutil/pixdesc.h" | |||
| #include "avfilter.h" | |||
| #define HIST_SIZE 4 | |||
| typedef enum { | |||
| TFF, | |||
| BFF, | |||
| PROGRSSIVE, | |||
| UNDETERMINED, | |||
| } Type; | |||
| typedef struct { | |||
| const AVClass *class; | |||
| float interlace_threshold; | |||
| float progressive_threshold; | |||
| Type last_type; | |||
| int prestat[4]; | |||
| int poststat[4]; | |||
| uint8_t history[HIST_SIZE]; | |||
| AVFrame *cur; | |||
| AVFrame *next; | |||
| AVFrame *prev; | |||
| int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w); | |||
| const AVPixFmtDescriptor *csp; | |||
| } IDETContext; | |||
| void ff_idet_init_x86(IDETContext *idet); | |||
| /* main fall-back for left-over */ | |||
| int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w); | |||
| #endif | |||
| @@ -1,5 +1,6 @@ | |||
| OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o | |||
| OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o | |||
| OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o | |||
| OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o | |||
| OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o | |||
| OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o | |||
| @@ -7,6 +8,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o | |||
| YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o | |||
| YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o | |||
| YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o | |||
| YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o | |||
| YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o | |||
| YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o | |||
| @@ -0,0 +1,114 @@ | |||
| ; ***************************************************************************** | |||
| ; * x86-optimized functions for idet filter | |||
| ; * | |||
| ; * This file is part of FFmpeg. | |||
| ; * | |||
| ; * FFmpeg is free software; you can redistribute it and/or modify | |||
| ; * it under the terms of the GNU General Public License as published by | |||
| ; * the Free Software Foundation; either version 2 of the License, or | |||
| ; * (at your option) any later version. | |||
| ; * | |||
| ; * FFmpeg is distributed in the hope that it will be useful, | |||
| ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| ; * GNU General Public License for more details. | |||
| ; * | |||
| ; * You should have received a copy of the GNU General Public License along | |||
| ; * with FFmpeg; if not, write to the Free Software Foundation, Inc., | |||
| ; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||
| ; ****************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION_TEXT | |||
| %if ARCH_X86_32 | |||
| ; Implementation that does 8-bytes at a time using single-word operations. | |||
| %macro IDET_FILTER_LINE 1 | |||
| INIT_MMX %1 | |||
| cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index | |||
| xor indexq, indexq | |||
| %define m_zero m2 | |||
| %define m_sum m5 | |||
| pxor m_sum, m_sum | |||
| pxor m_zero, m_zero | |||
| .loop: | |||
| movu m0, [aq + indexq*1] | |||
| punpckhbw m1, m0, m_zero | |||
| punpcklbw m0, m_zero | |||
| movu m3, [cq + indexq*1] | |||
| punpckhbw m4, m3, m_zero | |||
| punpcklbw m3, m_zero | |||
| paddsw m1, m4 | |||
| paddsw m0, m3 | |||
| movu m3, [bq + indexq*1] | |||
| punpckhbw m4, m3, m_zero | |||
| punpcklbw m3, m_zero | |||
| paddw m4, m4 | |||
| paddw m3, m3 | |||
| psubsw m1, m4 | |||
| psubsw m0, m3 | |||
| ABS2 m1, m0, m4, m3 | |||
| paddw m0, m1 | |||
| punpckhwd m1, m0, m_zero | |||
| punpcklwd m0, m_zero | |||
| paddd m0, m1 | |||
| paddd m_sum, m0 | |||
| add indexq, 0x8 | |||
| CMP widthd, indexd | |||
| jg .loop | |||
| mova m0, m_sum | |||
| psrlq m_sum, 0x20 | |||
| paddq m0, m_sum | |||
| movd eax, m0 | |||
| RET | |||
| %endmacro | |||
| IDET_FILTER_LINE mmxext | |||
| IDET_FILTER_LINE mmx | |||
| %endif | |||
| ; SSE2 8-bit implementation that does 16-bytes at a time: | |||
| INIT_XMM sse2 | |||
| cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total | |||
| xor indexq, indexq | |||
| pxor m0, m0 | |||
| pxor m1, m1 | |||
| .sse2_loop: | |||
| movu m2, [bq + indexq*1] ; B | |||
| movu m3, [aq + indexq*1] ; A | |||
| mova m6, m2 | |||
| mova m4, m3 | |||
| psubusb m5, m2, m3 ; ba | |||
| movu m3, [cq + indexq*1] ; C | |||
| add indexq, 0x10 | |||
| psubusb m4, m2 ; ab | |||
| CMP indexd, widthd | |||
| psubusb m6, m3 ; bc | |||
| psubusb m3, m2 ; cb | |||
| psadbw m4, m6 ; |ab - bc| | |||
| paddq m0, m4 | |||
| psadbw m5, m3 ; |ba - cb| | |||
| paddq m1, m5 | |||
| jl .sse2_loop | |||
| paddq m0, m1 | |||
| movhlps m1, m0 | |||
| paddq m0, m1 | |||
| movd eax, m0 | |||
| RET | |||
| @@ -0,0 +1,70 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License along | |||
| * with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavfilter/vf_idet.h" | |||
| /* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */ | |||
| #define FUNC_MAIN_DECL(KIND, SPAN) \ | |||
| int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \ | |||
| const uint8_t *c, int w); \ | |||
| static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \ | |||
| const uint8_t *c, int w) { \ | |||
| int sum = 0; \ | |||
| const int left_over = w & (SPAN - 1); \ | |||
| w -= left_over; \ | |||
| if (w > 0) \ | |||
| sum += ff_idet_filter_line_##KIND(a, b, c, w); \ | |||
| if (left_over > 0) \ | |||
| sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \ | |||
| return sum; \ | |||
| } | |||
| #if HAVE_YASM | |||
| FUNC_MAIN_DECL(sse2, 16) | |||
| #if ARCH_X86_32 | |||
| FUNC_MAIN_DECL(mmx, 8) | |||
| FUNC_MAIN_DECL(mmxext, 8) | |||
| #endif | |||
| #endif | |||
| av_cold void ff_idet_init_x86(IDETContext *idet) | |||
| { | |||
| #if HAVE_YASM | |||
| const int cpu_flags = av_get_cpu_flags(); | |||
| #if ARCH_X86_32 | |||
| if (EXTERNAL_MMX(cpu_flags)) { | |||
| idet->filter_line = idet_filter_line_mmx; | |||
| } | |||
| if (EXTERNAL_MMXEXT(cpu_flags)) { | |||
| idet->filter_line = idet_filter_line_mmxext; | |||
| } | |||
| #endif // ARCH_x86_32 | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| idet->filter_line = idet_filter_line_sse2; | |||
| } | |||
| #endif // HAVE_YASM | |||
| } | |||