integration by Neil Birkbeck, with help from Vitor Sessak. core SSE2 loop by Skal (pascal.massimino@gmail.com) Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.4
@@ -353,6 +353,7 @@ Filters: | |||||
vf_extractplanes.c Paul B Mahol | vf_extractplanes.c Paul B Mahol | ||||
vf_histogram.c Paul B Mahol | vf_histogram.c Paul B Mahol | ||||
vf_hqx.c Clément Bœsch | vf_hqx.c Clément Bœsch | ||||
vf_idec.c Pascal Massimino | |||||
vf_il.c Paul B Mahol | vf_il.c Paul B Mahol | ||||
vf_lenscorrection.c Daniel Oberhoff | vf_lenscorrection.c Daniel Oberhoff | ||||
vf_mergeplanes.c Paul B Mahol | vf_mergeplanes.c Paul B Mahol | ||||
@@ -23,37 +23,8 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/common.h" | #include "libavutil/common.h" | ||||
#include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
#include "libavutil/pixdesc.h" | |||||
#include "avfilter.h" | |||||
#include "internal.h" | #include "internal.h" | ||||
#define HIST_SIZE 4 | |||||
typedef enum { | |||||
TFF, | |||||
BFF, | |||||
PROGRSSIVE, | |||||
UNDETERMINED, | |||||
} Type; | |||||
typedef struct { | |||||
const AVClass *class; | |||||
float interlace_threshold; | |||||
float progressive_threshold; | |||||
Type last_type; | |||||
int prestat[4]; | |||||
int poststat[4]; | |||||
uint8_t history[HIST_SIZE]; | |||||
AVFrame *cur; | |||||
AVFrame *next; | |||||
AVFrame *prev; | |||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w); | |||||
const AVPixFmtDescriptor *csp; | |||||
} IDETContext; | |||||
#include "vf_idet.h" | |||||
#define OFFSET(x) offsetof(IDETContext, x) | #define OFFSET(x) offsetof(IDETContext, x) | ||||
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||||
@@ -77,7 +48,7 @@ static const char *type2str(Type type) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
static int filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w) | |||||
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w) | |||||
{ | { | ||||
int x; | int x; | ||||
int ret=0; | int ret=0; | ||||
@@ -271,7 +242,10 @@ static av_cold int init(AVFilterContext *ctx) | |||||
idet->last_type = UNDETERMINED; | idet->last_type = UNDETERMINED; | ||||
memset(idet->history, UNDETERMINED, HIST_SIZE); | memset(idet->history, UNDETERMINED, HIST_SIZE); | ||||
idet->filter_line = filter_line_c; | |||||
idet->filter_line = ff_idet_filter_line_c; | |||||
if (ARCH_X86) | |||||
ff_idet_init_x86(idet); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -0,0 +1,58 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License along | |||||
* with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVFILTER_IDET_H | |||||
#define AVFILTER_IDET_H | |||||
#include "libavutil/pixdesc.h" | |||||
#include "avfilter.h" | |||||
#define HIST_SIZE 4 | |||||
typedef enum { | |||||
TFF, | |||||
BFF, | |||||
PROGRSSIVE, | |||||
UNDETERMINED, | |||||
} Type; | |||||
typedef struct { | |||||
const AVClass *class; | |||||
float interlace_threshold; | |||||
float progressive_threshold; | |||||
Type last_type; | |||||
int prestat[4]; | |||||
int poststat[4]; | |||||
uint8_t history[HIST_SIZE]; | |||||
AVFrame *cur; | |||||
AVFrame *next; | |||||
AVFrame *prev; | |||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w); | |||||
const AVPixFmtDescriptor *csp; | |||||
} IDETContext; | |||||
void ff_idet_init_x86(IDETContext *idet); | |||||
/* main fall-back for left-over */ | |||||
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w); | |||||
#endif |
@@ -1,5 +1,6 @@ | |||||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o | OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o | ||||
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o | OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o | ||||
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o | |||||
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o | OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o | ||||
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o | OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o | ||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o | OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o | ||||
@@ -7,6 +8,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o | |||||
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o | YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o | ||||
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o | YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o | ||||
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o | |||||
YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o | YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o | ||||
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o | YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o | ||||
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o | YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o |
@@ -0,0 +1,114 @@ | |||||
; ***************************************************************************** | |||||
; * x86-optimized functions for idet filter | |||||
; * | |||||
; * This file is part of FFmpeg. | |||||
; * | |||||
; * FFmpeg is free software; you can redistribute it and/or modify | |||||
; * it under the terms of the GNU General Public License as published by | |||||
; * the Free Software Foundation; either version 2 of the License, or | |||||
; * (at your option) any later version. | |||||
; * | |||||
; * FFmpeg is distributed in the hope that it will be useful, | |||||
; * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
; * GNU General Public License for more details. | |||||
; * | |||||
; * You should have received a copy of the GNU General Public License along | |||||
; * with FFmpeg; if not, write to the Free Software Foundation, Inc., | |||||
; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||||
; ****************************************************************************** | |||||
%include "libavutil/x86/x86util.asm" | |||||
SECTION_TEXT | |||||
%if ARCH_X86_32 | |||||
; Implementation that does 8-bytes at a time using single-word operations. | |||||
%macro IDET_FILTER_LINE 1 | |||||
INIT_MMX %1 | |||||
cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index | |||||
xor indexq, indexq | |||||
%define m_zero m2 | |||||
%define m_sum m5 | |||||
pxor m_sum, m_sum | |||||
pxor m_zero, m_zero | |||||
.loop: | |||||
movu m0, [aq + indexq*1] | |||||
punpckhbw m1, m0, m_zero | |||||
punpcklbw m0, m_zero | |||||
movu m3, [cq + indexq*1] | |||||
punpckhbw m4, m3, m_zero | |||||
punpcklbw m3, m_zero | |||||
paddsw m1, m4 | |||||
paddsw m0, m3 | |||||
movu m3, [bq + indexq*1] | |||||
punpckhbw m4, m3, m_zero | |||||
punpcklbw m3, m_zero | |||||
paddw m4, m4 | |||||
paddw m3, m3 | |||||
psubsw m1, m4 | |||||
psubsw m0, m3 | |||||
ABS2 m1, m0, m4, m3 | |||||
paddw m0, m1 | |||||
punpckhwd m1, m0, m_zero | |||||
punpcklwd m0, m_zero | |||||
paddd m0, m1 | |||||
paddd m_sum, m0 | |||||
add indexq, 0x8 | |||||
CMP widthd, indexd | |||||
jg .loop | |||||
mova m0, m_sum | |||||
psrlq m_sum, 0x20 | |||||
paddq m0, m_sum | |||||
movd eax, m0 | |||||
RET | |||||
%endmacro | |||||
IDET_FILTER_LINE mmxext | |||||
IDET_FILTER_LINE mmx | |||||
%endif | |||||
; SSE2 8-bit implementation that does 16-bytes at a time: | |||||
INIT_XMM sse2 | |||||
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total | |||||
xor indexq, indexq | |||||
pxor m0, m0 | |||||
pxor m1, m1 | |||||
.sse2_loop: | |||||
movu m2, [bq + indexq*1] ; B | |||||
movu m3, [aq + indexq*1] ; A | |||||
mova m6, m2 | |||||
mova m4, m3 | |||||
psubusb m5, m2, m3 ; ba | |||||
movu m3, [cq + indexq*1] ; C | |||||
add indexq, 0x10 | |||||
psubusb m4, m2 ; ab | |||||
CMP indexd, widthd | |||||
psubusb m6, m3 ; bc | |||||
psubusb m3, m2 ; cb | |||||
psadbw m4, m6 ; |ab - bc| | |||||
paddq m0, m4 | |||||
psadbw m5, m3 ; |ba - cb| | |||||
paddq m1, m5 | |||||
jl .sse2_loop | |||||
paddq m0, m1 | |||||
movhlps m1, m0 | |||||
paddq m0, m1 | |||||
movd eax, m0 | |||||
RET |
@@ -0,0 +1,70 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License along | |||||
* with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/mem.h" | |||||
#include "libavutil/x86/asm.h" | |||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavfilter/vf_idet.h" | |||||
/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */ | |||||
#define FUNC_MAIN_DECL(KIND, SPAN) \ | |||||
int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \ | |||||
const uint8_t *c, int w); \ | |||||
static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \ | |||||
const uint8_t *c, int w) { \ | |||||
int sum = 0; \ | |||||
const int left_over = w & (SPAN - 1); \ | |||||
w -= left_over; \ | |||||
if (w > 0) \ | |||||
sum += ff_idet_filter_line_##KIND(a, b, c, w); \ | |||||
if (left_over > 0) \ | |||||
sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \ | |||||
return sum; \ | |||||
} | |||||
#if HAVE_YASM | |||||
FUNC_MAIN_DECL(sse2, 16) | |||||
#if ARCH_X86_32 | |||||
FUNC_MAIN_DECL(mmx, 8) | |||||
FUNC_MAIN_DECL(mmxext, 8) | |||||
#endif | |||||
#endif | |||||
av_cold void ff_idet_init_x86(IDETContext *idet) | |||||
{ | |||||
#if HAVE_YASM | |||||
const int cpu_flags = av_get_cpu_flags(); | |||||
#if ARCH_X86_32 | |||||
if (EXTERNAL_MMX(cpu_flags)) { | |||||
idet->filter_line = idet_filter_line_mmx; | |||||
} | |||||
if (EXTERNAL_MMXEXT(cpu_flags)) { | |||||
idet->filter_line = idet_filter_line_mmxext; | |||||
} | |||||
#endif // ARCH_x86_32 | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
idet->filter_line = idet_filter_line_sse2; | |||||
} | |||||
#endif // HAVE_YASM | |||||
} |