Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Thomas Mundt <tmundt75@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.4
@@ -25,9 +25,11 @@ | |||||
#ifndef AVFILTER_INTERLACE_H | #ifndef AVFILTER_INTERLACE_H | ||||
#define AVFILTER_INTERLACE_H | #define AVFILTER_INTERLACE_H | ||||
#include "libavutil/bswap.h" | |||||
#include "libavutil/common.h" | #include "libavutil/common.h" | ||||
#include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
#include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
#include "libavutil/pixdesc.h" | |||||
#include "avfilter.h" | #include "avfilter.h" | ||||
#include "formats.h" | #include "formats.h" | ||||
@@ -55,8 +57,9 @@ typedef struct InterlaceContext { | |||||
enum ScanMode scan; // top or bottom field first scanning | enum ScanMode scan; // top or bottom field first scanning | ||||
int lowpass; // enable or disable low pass filtering | int lowpass; // enable or disable low pass filtering | ||||
AVFrame *cur, *next; // the two frames from which the new one is obtained | AVFrame *cur, *next; // the two frames from which the new one is obtained | ||||
const AVPixFmtDescriptor *csp; | |||||
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, | void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, | ||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max); | |||||
} InterlaceContext; | } InterlaceContext; | ||||
void ff_interlace_init_x86(InterlaceContext *interlace); | void ff_interlace_init_x86(InterlaceContext *interlace); | ||||
@@ -27,7 +27,9 @@ | |||||
#ifndef AVFILTER_TINTERLACE_H | #ifndef AVFILTER_TINTERLACE_H | ||||
#define AVFILTER_TINTERLACE_H | #define AVFILTER_TINTERLACE_H | ||||
#include "libavutil/bswap.h" | |||||
#include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
#include "libavutil/pixdesc.h" | |||||
#include "drawutils.h" | #include "drawutils.h" | ||||
#include "avfilter.h" | #include "avfilter.h" | ||||
@@ -60,8 +62,9 @@ typedef struct TInterlaceContext { | |||||
int black_linesize[4]; | int black_linesize[4]; | ||||
FFDrawContext draw; | FFDrawContext draw; | ||||
FFDrawColor color; | FFDrawColor color; | ||||
const AVPixFmtDescriptor *csp; | |||||
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | ||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max); | |||||
} TInterlaceContext; | } TInterlaceContext; | ||||
void ff_tinterlace_init_x86(TInterlaceContext *interlace); | void ff_tinterlace_init_x86(TInterlaceContext *interlace); | ||||
@@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { | |||||
AVFILTER_DEFINE_CLASS(interlace); | AVFILTER_DEFINE_CLASS(interlace); | ||||
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, | static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref) | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max) | |||||
{ | { | ||||
const uint8_t *srcp_above = srcp + mref; | const uint8_t *srcp_above = srcp + mref; | ||||
const uint8_t *srcp_below = srcp + pref; | const uint8_t *srcp_below = srcp + pref; | ||||
@@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, | |||||
} | } | ||||
} | } | ||||
static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, | |||||
const uint8_t *src8, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max) | |||||
{ | |||||
uint16_t *dstp = (uint16_t *)dst8; | |||||
const uint16_t *srcp = (const uint16_t *)src8; | |||||
const uint16_t *srcp_above = srcp + mref / 2; | |||||
const uint16_t *srcp_below = srcp + pref / 2; | |||||
int i, src_x; | |||||
for (i = 0; i < linesize; i++) { | |||||
// this calculation is an integer representation of | |||||
// '0.5 * current + 0.25 * above + 0.25 * below' | |||||
// '1 +' is for rounding. | |||||
src_x = av_le2ne16(srcp[i]) << 1; | |||||
dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i]) | |||||
+ av_le2ne16(srcp_below[i])) >> 2); | |||||
} | |||||
} | |||||
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, | static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref) | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max) | |||||
{ | { | ||||
const uint8_t *srcp_above = srcp + mref; | const uint8_t *srcp_above = srcp + mref; | ||||
const uint8_t *srcp_below = srcp + pref; | const uint8_t *srcp_below = srcp + pref; | ||||
@@ -103,11 +122,51 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, | |||||
} | } | ||||
} | } | ||||
static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t linesize, | |||||
const uint8_t *src8, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max) | |||||
{ | |||||
uint16_t *dstp = (uint16_t *)dst8; | |||||
const uint16_t *srcp = (const uint16_t *)src8; | |||||
const uint16_t *srcp_above = srcp + mref / 2; | |||||
const uint16_t *srcp_below = srcp + pref / 2; | |||||
const uint16_t *srcp_above2 = srcp + mref; | |||||
const uint16_t *srcp_below2 = srcp + pref; | |||||
int i, dst_le, src_le, src_x, src_ab; | |||||
for (i = 0; i < linesize; i++) { | |||||
// this calculation is an integer representation of | |||||
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' | |||||
// '4 +' is for rounding. | |||||
src_le = av_le2ne16(srcp[i]); | |||||
src_x = src_le << 1; | |||||
src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); | |||||
dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1) | |||||
- av_le2ne16(srcp_above2[i]) | |||||
- av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max); | |||||
// Prevent over-sharpening: | |||||
// dst must not exceed src when the average of above and below | |||||
// is less than src. And the other way around. | |||||
if (src_ab > src_x) { | |||||
if (dst_le < src_le) | |||||
dstp[i] = av_le2ne16(src_le); | |||||
else | |||||
dstp[i] = av_le2ne16(dst_le); | |||||
} else if (dst_le > src_le) { | |||||
dstp[i] = av_le2ne16(src_le); | |||||
} else | |||||
dstp[i] = av_le2ne16(dst_le); | |||||
} | |||||
} | |||||
static const enum AVPixelFormat formats_supported[] = { | static const enum AVPixelFormat formats_supported[] = { | ||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, | |||||
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, | |||||
AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, | |||||
AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE | |||||
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, | |||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, | |||||
AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV444P10LE, | |||||
AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV444P12LE, | |||||
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, | |||||
AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE, | |||||
AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, | |||||
AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE | |||||
}; | }; | ||||
static int query_formats(AVFilterContext *ctx) | static int query_formats(AVFilterContext *ctx) | ||||
@@ -150,12 +209,19 @@ static int config_out_props(AVFilterLink *outlink) | |||||
outlink->time_base.num *= 2; | outlink->time_base.num *= 2; | ||||
outlink->frame_rate.den *= 2; | outlink->frame_rate.den *= 2; | ||||
s->csp = av_pix_fmt_desc_get(outlink->format); | |||||
if (s->lowpass) { | if (s->lowpass) { | ||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = lowpass_line_c; | |||||
else if (s->lowpass == VLPF_CMP) | |||||
s->lowpass_line = lowpass_line_complex_c; | |||||
if (s->lowpass == VLPF_LIN) { | |||||
if (s->csp->comp[0].depth > 8) | |||||
s->lowpass_line = lowpass_line_c_16; | |||||
else | |||||
s->lowpass_line = lowpass_line_c; | |||||
} else if (s->lowpass == VLPF_CMP) { | |||||
if (s->csp->comp[0].depth > 8) | |||||
s->lowpass_line = lowpass_line_complex_c_16; | |||||
else | |||||
s->lowpass_line = lowpass_line_complex_c; | |||||
} | |||||
if (ARCH_X86) | if (ARCH_X86) | ||||
ff_interlace_init_x86(s); | ff_interlace_init_x86(s); | ||||
} | } | ||||
@@ -183,6 +249,7 @@ static void copy_picture_field(InterlaceContext *s, | |||||
const uint8_t *srcp = src_frame->data[plane]; | const uint8_t *srcp = src_frame->data[plane]; | ||||
int srcp_linesize = src_frame->linesize[plane] * 2; | int srcp_linesize = src_frame->linesize[plane] * 2; | ||||
int dstp_linesize = dst_frame->linesize[plane] * 2; | int dstp_linesize = dst_frame->linesize[plane] * 2; | ||||
int clip_max = (1 << s->csp->comp[plane].depth) - 1; | |||||
av_assert0(cols >= 0 || lines >= 0); | av_assert0(cols >= 0 || lines >= 0); | ||||
@@ -202,11 +269,13 @@ static void copy_picture_field(InterlaceContext *s, | |||||
mref = 0; | mref = 0; | ||||
else if (j <= (1 + x)) | else if (j <= (1 + x)) | ||||
pref = 0; | pref = 0; | ||||
s->lowpass_line(dstp, cols, srcp, mref, pref); | |||||
s->lowpass_line(dstp, cols, srcp, mref, pref, clip_max); | |||||
dstp += dstp_linesize; | dstp += dstp_linesize; | ||||
srcp += srcp_linesize; | srcp += srcp_linesize; | ||||
} | } | ||||
} else { | } else { | ||||
if (s->csp->comp[plane].depth > 8) | |||||
cols *= 2; | |||||
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); | av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); | ||||
} | } | ||||
} | } | ||||
@@ -78,7 +78,12 @@ static int query_formats(AVFilterContext *ctx) | |||||
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, | AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, | ||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, | ||||
AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, | AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, | ||||
AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, | |||||
AV_PIX_FMT_YUV440P10LE, AV_PIX_FMT_YUV444P10LE, | |||||
AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, | |||||
AV_PIX_FMT_YUV440P12LE, AV_PIX_FMT_YUV444P12LE, | |||||
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, | AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, | ||||
AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE, | |||||
AV_PIX_FMT_GRAY8, FULL_SCALE_YUVJ_FORMATS, | AV_PIX_FMT_GRAY8, FULL_SCALE_YUVJ_FORMATS, | ||||
AV_PIX_FMT_NONE | AV_PIX_FMT_NONE | ||||
}; | }; | ||||
@@ -90,7 +95,7 @@ static int query_formats(AVFilterContext *ctx) | |||||
} | } | ||||
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | ||||
ptrdiff_t mref, ptrdiff_t pref) | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max) | |||||
{ | { | ||||
const uint8_t *srcp_above = srcp + mref; | const uint8_t *srcp_above = srcp + mref; | ||||
const uint8_t *srcp_below = srcp + pref; | const uint8_t *srcp_below = srcp + pref; | ||||
@@ -103,8 +108,26 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | |||||
} | } | ||||
} | } | ||||
static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8, | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max) | |||||
{ | |||||
uint16_t *dstp = (uint16_t *)dst8; | |||||
const uint16_t *srcp = (const uint16_t *)src8; | |||||
const uint16_t *srcp_above = srcp + mref / 2; | |||||
const uint16_t *srcp_below = srcp + pref / 2; | |||||
int i, src_x; | |||||
for (i = 0; i < width; i++) { | |||||
// this calculation is an integer representation of | |||||
// '0.5 * current + 0.25 * above + 0.25 * below' | |||||
// '1 +' is for rounding. | |||||
src_x = av_le2ne16(srcp[i]) << 1; | |||||
dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i]) | |||||
+ av_le2ne16(srcp_below[i])) >> 2); | |||||
} | |||||
} | |||||
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, | ||||
ptrdiff_t mref, ptrdiff_t pref) | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max) | |||||
{ | { | ||||
const uint8_t *srcp_above = srcp + mref; | const uint8_t *srcp_above = srcp + mref; | ||||
const uint8_t *srcp_below = srcp + pref; | const uint8_t *srcp_below = srcp + pref; | ||||
@@ -130,6 +153,41 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t | |||||
} | } | ||||
} | } | ||||
static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8, | |||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max) | |||||
{ | |||||
uint16_t *dstp = (uint16_t *)dst8; | |||||
const uint16_t *srcp = (const uint16_t *)src8; | |||||
const uint16_t *srcp_above = srcp + mref / 2; | |||||
const uint16_t *srcp_below = srcp + pref / 2; | |||||
const uint16_t *srcp_above2 = srcp + mref; | |||||
const uint16_t *srcp_below2 = srcp + pref; | |||||
int i, dst_le, src_le, src_x, src_ab; | |||||
for (i = 0; i < width; i++) { | |||||
// this calculation is an integer representation of | |||||
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' | |||||
// '4 +' is for rounding. | |||||
src_le = av_le2ne16(srcp[i]); | |||||
src_x = src_le << 1; | |||||
src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); | |||||
dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1) | |||||
- av_le2ne16(srcp_above2[i]) | |||||
- av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max); | |||||
// Prevent over-sharpening: | |||||
// dst must not exceed src when the average of above and below | |||||
// is less than src. And the other way around. | |||||
if (src_ab > src_x) { | |||||
if (dst_le < src_le) | |||||
dstp[i] = av_le2ne16(src_le); | |||||
else | |||||
dstp[i] = av_le2ne16(dst_le); | |||||
} else if (dst_le > src_le) { | |||||
dstp[i] = av_le2ne16(src_le); | |||||
} else | |||||
dstp[i] = av_le2ne16(dst_le); | |||||
} | |||||
} | |||||
static av_cold void uninit(AVFilterContext *ctx) | static av_cold void uninit(AVFilterContext *ctx) | ||||
{ | { | ||||
TInterlaceContext *tinterlace = ctx->priv; | TInterlaceContext *tinterlace = ctx->priv; | ||||
@@ -198,12 +256,19 @@ static int config_out_props(AVFilterLink *outlink) | |||||
(tinterlace->flags & TINTERLACE_FLAG_EXACT_TB)) | (tinterlace->flags & TINTERLACE_FLAG_EXACT_TB)) | ||||
outlink->time_base = tinterlace->preout_time_base; | outlink->time_base = tinterlace->preout_time_base; | ||||
tinterlace->csp = av_pix_fmt_desc_get(outlink->format); | |||||
if (tinterlace->flags & TINTERLACE_FLAG_CVLPF) { | if (tinterlace->flags & TINTERLACE_FLAG_CVLPF) { | ||||
tinterlace->lowpass_line = lowpass_line_complex_c; | |||||
if (tinterlace->csp->comp[0].depth > 8) | |||||
tinterlace->lowpass_line = lowpass_line_complex_c_16; | |||||
else | |||||
tinterlace->lowpass_line = lowpass_line_complex_c; | |||||
if (ARCH_X86) | if (ARCH_X86) | ||||
ff_tinterlace_init_x86(tinterlace); | ff_tinterlace_init_x86(tinterlace); | ||||
} else if (tinterlace->flags & TINTERLACE_FLAG_VLPF) { | } else if (tinterlace->flags & TINTERLACE_FLAG_VLPF) { | ||||
tinterlace->lowpass_line = lowpass_line_c; | |||||
if (tinterlace->csp->comp[0].depth > 8) | |||||
tinterlace->lowpass_line = lowpass_line_c_16; | |||||
else | |||||
tinterlace->lowpass_line = lowpass_line_c; | |||||
if (ARCH_X86) | if (ARCH_X86) | ||||
ff_tinterlace_init_x86(tinterlace); | ff_tinterlace_init_x86(tinterlace); | ||||
} | } | ||||
@@ -250,6 +315,7 @@ void copy_picture_field(TInterlaceContext *tinterlace, | |||||
const uint8_t *srcp = src[plane]; | const uint8_t *srcp = src[plane]; | ||||
int srcp_linesize = src_linesize[plane] * k; | int srcp_linesize = src_linesize[plane] * k; | ||||
int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1); | int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1); | ||||
int clip_max = (1 << tinterlace->csp->comp[plane].depth) - 1; | |||||
lines = (lines + (src_field == FIELD_UPPER)) / k; | lines = (lines + (src_field == FIELD_UPPER)) / k; | ||||
if (src_field == FIELD_LOWER) | if (src_field == FIELD_LOWER) | ||||
@@ -267,11 +333,13 @@ void copy_picture_field(TInterlaceContext *tinterlace, | |||||
if (h >= (lines - x)) mref = 0; // there is no line above | if (h >= (lines - x)) mref = 0; // there is no line above | ||||
else if (h <= (1 + x)) pref = 0; // there is no line below | else if (h <= (1 + x)) pref = 0; // there is no line below | ||||
tinterlace->lowpass_line(dstp, cols, srcp, mref, pref); | |||||
tinterlace->lowpass_line(dstp, cols, srcp, mref, pref, clip_max); | |||||
dstp += dstp_linesize; | dstp += dstp_linesize; | ||||
srcp += srcp_linesize; | srcp += srcp_linesize; | ||||
} | } | ||||
} else { | } else { | ||||
if (tinterlace->csp->comp[plane].depth > 8) | |||||
cols *= 2; | |||||
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); | av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); | ||||
} | } | ||||
} | } | ||||
@@ -30,27 +30,26 @@ pw_4: times 8 dw 4 | |||||
SECTION .text | SECTION .text | ||||
%macro LOWPASS_LINE 0 | |||||
cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref | |||||
%macro LOWPASS 1 | |||||
add dstq, hq | add dstq, hq | ||||
add srcq, hq | add srcq, hq | ||||
add mrefq, srcq | add mrefq, srcq | ||||
add prefq, srcq | add prefq, srcq | ||||
neg hq | neg hq | ||||
pcmpeqb m6, m6 | |||||
pcmpeq%1 m6, m6 | |||||
.loop: | .loop: | ||||
mova m0, [mrefq+hq] | mova m0, [mrefq+hq] | ||||
mova m1, [mrefq+hq+mmsize] | mova m1, [mrefq+hq+mmsize] | ||||
pavgb m0, [prefq+hq] | |||||
pavgb m1, [prefq+hq+mmsize] | |||||
pavg%1 m0, [prefq+hq] | |||||
pavg%1 m1, [prefq+hq+mmsize] | |||||
pxor m0, m6 | pxor m0, m6 | ||||
pxor m1, m6 | pxor m1, m6 | ||||
pxor m2, m6, [srcq+hq] | pxor m2, m6, [srcq+hq] | ||||
pxor m3, m6, [srcq+hq+mmsize] | pxor m3, m6, [srcq+hq+mmsize] | ||||
pavgb m0, m2 | |||||
pavgb m1, m3 | |||||
pavg%1 m0, m2 | |||||
pavg%1 m1, m3 | |||||
pxor m0, m6 | pxor m0, m6 | ||||
pxor m1, m6 | pxor m1, m6 | ||||
mova [dstq+hq], m0 | mova [dstq+hq], m0 | ||||
@@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref | |||||
add hq, 2*mmsize | add hq, 2*mmsize | ||||
jl .loop | jl .loop | ||||
REP_RET | REP_RET | ||||
%endmacro | |||||
%macro LOWPASS_LINE 0 | |||||
cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref | |||||
LOWPASS b | |||||
cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref | |||||
shl hq, 1 | |||||
LOWPASS w | |||||
%endmacro | %endmacro | ||||
%macro LOWPASS_LINE_COMPLEX 0 | %macro LOWPASS_LINE_COMPLEX 0 | ||||
@@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref | |||||
jg .loop | jg .loop | ||||
REP_RET | REP_RET | ||||
cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max | |||||
movd m7, DWORD clip_maxm | |||||
SPLATW m7, m7, 0 | |||||
mova [rsp], m7 | |||||
.loop: | |||||
mova m0, [srcq+mrefq] | |||||
mova m1, [srcq+mrefq+mmsize] | |||||
mova m2, [srcq+prefq] | |||||
mova m3, [srcq+prefq+mmsize] | |||||
paddw m0, m2 | |||||
paddw m1, m3 | |||||
mova m6, m0 | |||||
mova m7, m1 | |||||
mova m2, [srcq] | |||||
mova m3, [srcq+mmsize] | |||||
paddw m0, m2 | |||||
paddw m1, m3 | |||||
psllw m2, 1 | |||||
psllw m3, 1 | |||||
paddw m0, m2 | |||||
paddw m1, m3 | |||||
psllw m0, 1 | |||||
psllw m1, 1 | |||||
pcmpgtw m6, m2 | |||||
pcmpgtw m7, m3 | |||||
mova m2, [srcq+2*mrefq] | |||||
mova m3, [srcq+2*mrefq+mmsize] | |||||
mova m4, [srcq+2*prefq] | |||||
mova m5, [srcq+2*prefq+mmsize] | |||||
paddw m2, m4 | |||||
paddw m3, m5 | |||||
paddw m0, [pw_4] | |||||
paddw m1, [pw_4] | |||||
psubusw m0, m2 | |||||
psubusw m1, m3 | |||||
psrlw m0, 3 | |||||
psrlw m1, 3 | |||||
pminsw m0, [rsp] | |||||
pminsw m1, [rsp] | |||||
mova m2, m0 | |||||
mova m3, m1 | |||||
pmaxsw m0, [srcq] | |||||
pmaxsw m1, [srcq+mmsize] | |||||
pminsw m2, [srcq] | |||||
pminsw m3, [srcq+mmsize] | |||||
pand m0, m6 | |||||
pand m1, m7 | |||||
pandn m6, m2 | |||||
pandn m7, m3 | |||||
por m0, m6 | |||||
por m1, m7 | |||||
mova [dstq], m0 | |||||
mova [dstq+mmsize], m1 | |||||
add dstq, 2*mmsize | |||||
add srcq, 2*mmsize | |||||
sub hd, mmsize | |||||
jg .loop | |||||
REP_RET | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse2 | INIT_XMM sse2 | ||||
@@ -27,27 +27,50 @@ | |||||
#include "libavfilter/interlace.h" | #include "libavfilter/interlace.h" | ||||
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
av_cold void ff_interlace_init_x86(InterlaceContext *s) | av_cold void ff_interlace_init_x86(InterlaceContext *s) | ||||
{ | { | ||||
int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_sse2; | |||||
else if (s->lowpass == VLPF_CMP) | |||||
s->lowpass_line = ff_lowpass_line_complex_sse2; | |||||
if (s->csp->comp[0].depth > 8) { | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_16_sse2; | |||||
else if (s->lowpass == VLPF_CMP) | |||||
s->lowpass_line = ff_lowpass_line_complex_12_sse2; | |||||
} | |||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_16_avx; | |||||
} else { | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_sse2; | |||||
else if (s->lowpass == VLPF_CMP) | |||||
s->lowpass_line = ff_lowpass_line_complex_sse2; | |||||
} | |||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_avx; | |||||
} | } | ||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (s->lowpass == VLPF_LIN) | |||||
s->lowpass_line = ff_lowpass_line_avx; | |||||
} | } |
@@ -28,27 +28,50 @@ | |||||
#include "libavfilter/tinterlace.h" | #include "libavfilter/tinterlace.h" | ||||
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, | void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, | ||||
const uint8_t *srcp, | |||||
ptrdiff_t mref, ptrdiff_t pref); | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize, | |||||
const uint8_t *srcp, ptrdiff_t mref, | |||||
ptrdiff_t pref, int clip_max); | |||||
av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) | av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) | ||||
{ | { | ||||
int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_sse2; | |||||
else | |||||
s->lowpass_line = ff_lowpass_line_complex_sse2; | |||||
if (s->csp->comp[0].depth > 8) { | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_16_sse2; | |||||
else | |||||
s->lowpass_line = ff_lowpass_line_complex_12_sse2; | |||||
} | |||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_16_avx; | |||||
} else { | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_sse2; | |||||
else | |||||
s->lowpass_line = ff_lowpass_line_complex_sse2; | |||||
} | |||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_avx; | |||||
} | } | ||||
if (EXTERNAL_AVX(cpu_flags)) | |||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF)) | |||||
s->lowpass_line = ff_lowpass_line_avx; | |||||
} | } |
@@ -2,12 +2,23 @@ gray 9849d71519ae9c584ae8abfa8adb2f8e | |||||
yuv410p 44ee4b74b95c82d6f79ddf53b5e3aa9d | yuv410p 44ee4b74b95c82d6f79ddf53b5e3aa9d | ||||
yuv411p 5fa9d1fba7adfd6f7fa04464332b631a | yuv411p 5fa9d1fba7adfd6f7fa04464332b631a | ||||
yuv420p ee9591ea3ab06c73be902c4b8868c69e | yuv420p ee9591ea3ab06c73be902c4b8868c69e | ||||
yuv420p10le 19b2dcf5e82725b2c9e366d5d3a4b67b | |||||
yuv420p12le e5b76eb58cab2dfa42738f665d8b8059 | |||||
yuv422p b1be7b55567bde86d655adf80fac1257 | yuv422p b1be7b55567bde86d655adf80fac1257 | ||||
yuv422p10le 66f7433c7423d6289ee20c19c4e8dd90 | |||||
yuv422p12le b4a4b5ed81341b36f685b14b2bb8a798 | |||||
yuv440p ddf6ee697f4ff4f90d501e6869392309 | yuv440p ddf6ee697f4ff4f90d501e6869392309 | ||||
yuv440p10le 7e44a92d9e47e15f0728b125725ccd41 | |||||
yuv440p12le c2cac82661e10be4917bd9dacb419fce | |||||
yuv444p 7cb5d0c0997c8c2545a16bfc4cb9fd6d | yuv444p 7cb5d0c0997c8c2545a16bfc4cb9fd6d | ||||
yuv444p10le 08fb13c69b40229014a6ccf0bdd5498f | |||||
yuv444p12le 796c5559b9f426470eb2997c21be5d14 | |||||
yuva420p ee0761e2f76ec441c545feede77103e4 | yuva420p ee0761e2f76ec441c545feede77103e4 | ||||
yuva420p10le 4bb6b3a1cbf2fd1fe5aefcc11ce8029f | |||||
yuva422p a8da2806e21a88449079faa7f4303ffa | yuva422p a8da2806e21a88449079faa7f4303ffa | ||||
yuva422p10le d2965b5b5a43a7165badaff0718a17d8 | |||||
yuva444p a3f57734d6f72bdf37f8f612ea7cce63 | yuva444p a3f57734d6f72bdf37f8f612ea7cce63 | ||||
yuva444p10le e020512901fd9ac7088898a4e3a8c7c1 | |||||
yuvj420p 9f358e311b694bcd01e1a07d1120ade5 | yuvj420p 9f358e311b694bcd01e1a07d1120ade5 | ||||
yuvj422p 9a7628a9f1630d35c7176951ddc1b2f6 | yuvj422p 9a7628a9f1630d35c7176951ddc1b2f6 | ||||
yuvj440p 112fe35292c687746ec0c622a42c611b | yuvj440p 112fe35292c687746ec0c622a42c611b | ||||
@@ -2,12 +2,23 @@ gray fab3a7abc4f076cf926205aeacadbe51 | |||||
yuv410p d4506e49eeb64c7ce714c07597e7dd69 | yuv410p d4506e49eeb64c7ce714c07597e7dd69 | ||||
yuv411p 2e8bb385cb4a53a0f3771815020f7213 | yuv411p 2e8bb385cb4a53a0f3771815020f7213 | ||||
yuv420p c967d3d5f3200d1b2417b0f2356f12fc | yuv420p c967d3d5f3200d1b2417b0f2356f12fc | ||||
yuv420p10le 404d90eca9b93f7c0949be23187804c7 | |||||
yuv420p12le eb2ef81b82f1342618e4c9fe8e086592 | |||||
yuv422p febaa84ea2e3246af742a7ed37c030f6 | yuv422p febaa84ea2e3246af742a7ed37c030f6 | ||||
yuv422p10le 0dc5f3833c0f11a0fcd0422d76508c10 | |||||
yuv422p12le 190eb7e76af5fc481588ded472901b58 | |||||
yuv440p 13a934b42df65f11e153314ebb4f311e | yuv440p 13a934b42df65f11e153314ebb4f311e | ||||
yuv440p10le 7fabea51d6e52b718ef3ab919b24f8d2 | |||||
yuv440p12le c7b138374a18f258bdd1a2d21b4421b0 | |||||
yuv444p 45d4466f5689942a4effb0fd23e44949 | yuv444p 45d4466f5689942a4effb0fd23e44949 | ||||
yuv444p10le fde95627ba0e66be7ec863386b3e5ca6 | |||||
yuv444p12le c1e15c934ce3bc562a869dd78c75db17 | |||||
yuva420p dc1173a07c3f993b277ea0c94d513e1f | yuva420p dc1173a07c3f993b277ea0c94d513e1f | ||||
yuva420p10le d85972762437105a932759c4bb9759c9 | |||||
yuva422p ca200be80e5bfdb159e1aea57129ed3a | yuva422p ca200be80e5bfdb159e1aea57129ed3a | ||||
yuva422p10le 06d4f79ee2ddf31d9fe15af8ca573f46 | |||||
yuva444p 9f39c35d6899dcb8b9a9b07c339ca365 | yuva444p 9f39c35d6899dcb8b9a9b07c339ca365 | ||||
yuva444p10le b0c54fc3efad73f252d86127407aa1fd | |||||
yuvj420p 844359293bb6ff81549f3fc0090cc587 | yuvj420p 844359293bb6ff81549f3fc0090cc587 | ||||
yuvj422p 526af049d43974822baa7b48aa1e1098 | yuvj422p 526af049d43974822baa7b48aa1e1098 | ||||
yuvj440p af9285194da8efbc40d93bf8109f9dc5 | yuvj440p af9285194da8efbc40d93bf8109f9dc5 | ||||
@@ -2,12 +2,23 @@ gray 7ef396fecd8d1c9fe32173e4415ba671 | |||||
yuv410p 35bc11d0d32efc9e9a969be7d720f4e6 | yuv410p 35bc11d0d32efc9e9a969be7d720f4e6 | ||||
yuv411p 17ef3cd22a74f7368b5e02f68779f294 | yuv411p 17ef3cd22a74f7368b5e02f68779f294 | ||||
yuv420p 93d5b6a4c44d67e4d4447e8dd0bf3d33 | yuv420p 93d5b6a4c44d67e4d4447e8dd0bf3d33 | ||||
yuv420p10le 14e754c6e9d41cb048ce3c93512d7d35 | |||||
yuv420p12le ce54a2e38d121a7575dff30542facaad | |||||
yuv422p 3ee40b0b6533b9183764b85c853ec3f9 | yuv422p 3ee40b0b6533b9183764b85c853ec3f9 | ||||
yuv422p10le d4b61a84b93e74b07b7020ceed40e39e | |||||
yuv422p12le 8fd90be12a97307645ecfcd09d576643 | |||||
yuv440p 1d3c1258a51d09e778cd8368b1a4126f | yuv440p 1d3c1258a51d09e778cd8368b1a4126f | ||||
yuv440p10le 29d116cb550a05920e5619ab58284d30 | |||||
yuv440p12le d7518f941a3b2c137f944afe9da816a1 | |||||
yuv444p 1093568ad8f479ec20e738d018dd3f8f | yuv444p 1093568ad8f479ec20e738d018dd3f8f | ||||
yuv444p10le c65233e1d8b01f3369c20738f7386801 | |||||
yuv444p12le 70dc51a857bfb215b3a81fceb114b74c | |||||
yuva420p 4588aef20c0010e514550c9391219724 | yuva420p 4588aef20c0010e514550c9391219724 | ||||
yuva420p10le 3181e84fd7aaed606bb86eecd2e13f20 | |||||
yuva422p 3426ed1ac9429202d8c29fa62a04d4c3 | yuva422p 3426ed1ac9429202d8c29fa62a04d4c3 | ||||
yuva422p10le c00acd7c437d41755dff09c5ca3642cf | |||||
yuva444p 1b9fc791c7d774b4ba8c9dc836f78cf5 | yuva444p 1b9fc791c7d774b4ba8c9dc836f78cf5 | ||||
yuva444p10le 616b42a232c83b8f9e5c5168ec4b5da5 | |||||
yuvj420p 9a872e0c1b3c0b6fe856415696b758bd | yuvj420p 9a872e0c1b3c0b6fe856415696b758bd | ||||
yuvj422p da3c9ef25528a2ee96746ce44e6969f3 | yuvj422p da3c9ef25528a2ee96746ce44e6969f3 | ||||
yuvj440p a9a5495c6b0e2bf6e561998ea1c356a7 | yuvj440p a9a5495c6b0e2bf6e561998ea1c356a7 | ||||
@@ -2,12 +2,23 @@ gray b79791449947c25cd5b36d9d3b9d1831 | |||||
yuv410p 5bc03f4cf6b441b421f0fdaeeff1e9ed | yuv410p 5bc03f4cf6b441b421f0fdaeeff1e9ed | ||||
yuv411p 19046df1876c46ed1ef0458680270bd3 | yuv411p 19046df1876c46ed1ef0458680270bd3 | ||||
yuv420p 69c743b84996be9430b051a55cfbcb29 | yuv420p 69c743b84996be9430b051a55cfbcb29 | ||||
yuv420p10le 85948ad609abded6b50882d459f5a2f8 | |||||
yuv420p12le 7cebe45f51bdadc766f66c68db8d347d | |||||
yuv422p d710ccd1941f6f389c97a09bc977e709 | yuv422p d710ccd1941f6f389c97a09bc977e709 | ||||
yuv422p10le c54873f77dac1d710fb2aa1b0ce2669c | |||||
yuv422p12le 94a527bb787b9d121ffbbcb3a6c545d8 | |||||
yuv440p 1a482a23fe5a9b7d02388c299fd0a423 | yuv440p 1a482a23fe5a9b7d02388c299fd0a423 | ||||
yuv440p10le 506efa287ecce9c951da2039fa1de2ae | |||||
yuv440p12le 631bcf190f409ccbc5c27b9f0f6ba5e2 | |||||
yuv444p c968a92f4b7ab6706ee9b425eb5345b5 | yuv444p c968a92f4b7ab6706ee9b425eb5345b5 | ||||
yuv444p10le 0af437e635d49feccf7dfae201e6dfc5 | |||||
yuv444p12le 2e9e9f7caae1fae3b026810246fc6ac1 | |||||
yuva420p 3f89a166f309c0cda8b91a9e8a0ce937 | yuva420p 3f89a166f309c0cda8b91a9e8a0ce937 | ||||
yuva420p10le 79de1cc549c03d4893cf6f1aca86e057 | |||||
yuva422p ef8fdbe910d68e88e98227b0e99fb5a6 | yuva422p ef8fdbe910d68e88e98227b0e99fb5a6 | ||||
yuva422p10le 257a4aec41f9b5412179272d8a7fb6f7 | |||||
yuva444p 3662eadd5f61a6edbc9d715ea8591415 | yuva444p 3662eadd5f61a6edbc9d715ea8591415 | ||||
yuva444p10le 0905cf5b7f42c11be3f0486a66533c71 | |||||
yuvj420p 14c4390b319c5d679184503309060ac3 | yuvj420p 14c4390b319c5d679184503309060ac3 | ||||
yuvj422p bbe00a26526931b72a024febe1cd6b90 | yuvj422p bbe00a26526931b72a024febe1cd6b90 | ||||
yuvj440p f654cf28b7879c6a6c950c3cb9612580 | yuvj440p f654cf28b7879c6a6c950c3cb9612580 | ||||