|
|
|
@@ -337,13 +337,194 @@ static int config_props(AVFilterLink *inlink) |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
struct thread_data { |
|
|
|
AVFrame *in; |
|
|
|
AVFrame *out; |
|
|
|
|
|
|
|
int w; |
|
|
|
int h; |
|
|
|
}; |
|
|
|
|
|
|
|
#define LOAD_PACKED_COMMON\ |
|
|
|
LutContext *s = ctx->priv;\ |
|
|
|
const struct thread_data *td = arg;\ |
|
|
|
\ |
|
|
|
int i, j;\ |
|
|
|
const int w = td->w;\ |
|
|
|
const int h = td->h;\ |
|
|
|
AVFrame *in = td->in;\ |
|
|
|
AVFrame *out = td->out;\ |
|
|
|
const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut;\ |
|
|
|
const int step = s->step;\ |
|
|
|
\ |
|
|
|
const int slice_start = (h * jobnr ) / nb_jobs;\ |
|
|
|
const int slice_end = (h * (jobnr+1)) / nb_jobs;\ |
|
|
|
|
|
|
|
/* packed, 16-bit */ |
|
|
|
static int lut_packed_16bits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
|
|
|
{ |
|
|
|
LOAD_PACKED_COMMON |
|
|
|
|
|
|
|
uint16_t *inrow, *outrow, *inrow0, *outrow0; |
|
|
|
const int in_linesize = in->linesize[0] / 2; |
|
|
|
const int out_linesize = out->linesize[0] / 2; |
|
|
|
inrow0 = (uint16_t *)in ->data[0]; |
|
|
|
outrow0 = (uint16_t *)out->data[0]; |
|
|
|
|
|
|
|
for (i = slice_start; i < slice_end; i++) { |
|
|
|
inrow = inrow0 + i * in_linesize; |
|
|
|
outrow = outrow0 + i * out_linesize; |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
|
|
|
|
switch (step) { |
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
case 4: outrow[3] = av_bswap16(tab[3][av_bswap16(inrow[3])]); // Fall-through |
|
|
|
case 3: outrow[2] = av_bswap16(tab[2][av_bswap16(inrow[2])]); // Fall-through |
|
|
|
case 2: outrow[1] = av_bswap16(tab[1][av_bswap16(inrow[1])]); // Fall-through |
|
|
|
default: outrow[0] = av_bswap16(tab[0][av_bswap16(inrow[0])]); |
|
|
|
#else |
|
|
|
case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through |
|
|
|
case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through |
|
|
|
case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through |
|
|
|
default: outrow[0] = tab[0][inrow[0]]; |
|
|
|
#endif |
|
|
|
} |
|
|
|
outrow += step; |
|
|
|
inrow += step; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* packed, 8-bit */ |
|
|
|
static int lut_packed_8bits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
|
|
|
{ |
|
|
|
LOAD_PACKED_COMMON |
|
|
|
|
|
|
|
uint8_t *inrow, *outrow, *inrow0, *outrow0; |
|
|
|
const int in_linesize = in->linesize[0]; |
|
|
|
const int out_linesize = out->linesize[0]; |
|
|
|
inrow0 = in ->data[0]; |
|
|
|
outrow0 = out->data[0]; |
|
|
|
|
|
|
|
for (i = slice_start; i < slice_end; i++) { |
|
|
|
inrow = inrow0 + i * in_linesize; |
|
|
|
outrow = outrow0 + i * out_linesize; |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
switch (step) { |
|
|
|
case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through |
|
|
|
case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through |
|
|
|
case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through |
|
|
|
default: outrow[0] = tab[0][inrow[0]]; |
|
|
|
} |
|
|
|
outrow += step; |
|
|
|
inrow += step; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
#define LOAD_PLANAR_COMMON\ |
|
|
|
LutContext *s = ctx->priv;\ |
|
|
|
const struct thread_data *td = arg;\ |
|
|
|
int i, j, plane;\ |
|
|
|
AVFrame *in = td->in;\ |
|
|
|
AVFrame *out = td->out;\ |
|
|
|
|
|
|
|
#define PLANAR_COMMON\ |
|
|
|
int vsub = plane == 1 || plane == 2 ? s->vsub : 0;\ |
|
|
|
int hsub = plane == 1 || plane == 2 ? s->hsub : 0;\ |
|
|
|
int h = AV_CEIL_RSHIFT(td->h, vsub);\ |
|
|
|
int w = AV_CEIL_RSHIFT(td->w, hsub);\ |
|
|
|
const uint16_t *tab = s->lut[plane];\ |
|
|
|
\ |
|
|
|
const int slice_start = (h * jobnr ) / nb_jobs;\ |
|
|
|
const int slice_end = (h * (jobnr+1)) / nb_jobs;\ |
|
|
|
|
|
|
|
/* planar >8 bit depth */ |
|
|
|
static int lut_planar_16bits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
|
|
|
{ |
|
|
|
LOAD_PLANAR_COMMON |
|
|
|
|
|
|
|
uint16_t *inrow, *outrow; |
|
|
|
|
|
|
|
for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) { |
|
|
|
PLANAR_COMMON |
|
|
|
|
|
|
|
const int in_linesize = in->linesize[plane] / 2; |
|
|
|
const int out_linesize = out->linesize[plane] / 2; |
|
|
|
|
|
|
|
inrow = (uint16_t *)(in ->data[plane] + slice_start * in_linesize); |
|
|
|
outrow = (uint16_t *)(out->data[plane] + slice_start * out_linesize); |
|
|
|
|
|
|
|
for (i = slice_start; i < slice_end; i++) { |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]); |
|
|
|
#else |
|
|
|
outrow[j] = tab[inrow[j]]; |
|
|
|
#endif |
|
|
|
} |
|
|
|
inrow += in_linesize; |
|
|
|
outrow += out_linesize; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* planar 8bit depth */ |
|
|
|
static int lut_planar_8bits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
|
|
|
{ |
|
|
|
LOAD_PLANAR_COMMON |
|
|
|
|
|
|
|
uint8_t *inrow, *outrow; |
|
|
|
|
|
|
|
for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) { |
|
|
|
PLANAR_COMMON |
|
|
|
|
|
|
|
const int in_linesize = in->linesize[plane]; |
|
|
|
const int out_linesize = out->linesize[plane]; |
|
|
|
|
|
|
|
inrow = in ->data[plane] + slice_start * in_linesize; |
|
|
|
outrow = out->data[plane] + slice_start * out_linesize; |
|
|
|
|
|
|
|
for (i = slice_start; i < slice_end; i++) { |
|
|
|
for (j = 0; j < w; j++) |
|
|
|
outrow[j] = tab[inrow[j]]; |
|
|
|
inrow += in_linesize; |
|
|
|
outrow += out_linesize; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
#define PACKED_THREAD_DATA\ |
|
|
|
struct thread_data td = {\ |
|
|
|
.in = in,\ |
|
|
|
.out = out,\ |
|
|
|
.w = inlink->w,\ |
|
|
|
.h = in->height,\ |
|
|
|
};\ |
|
|
|
|
|
|
|
#define PLANAR_THREAD_DATA\ |
|
|
|
struct thread_data td = {\ |
|
|
|
.in = in,\ |
|
|
|
.out = out,\ |
|
|
|
.w = inlink->w,\ |
|
|
|
.h = inlink->h,\ |
|
|
|
};\ |
|
|
|
|
|
|
|
static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
|
|
|
{ |
|
|
|
AVFilterContext *ctx = inlink->dst; |
|
|
|
LutContext *s = ctx->priv; |
|
|
|
AVFilterLink *outlink = ctx->outputs[0]; |
|
|
|
AVFrame *out; |
|
|
|
int i, j, plane, direct = 0; |
|
|
|
int direct = 0; |
|
|
|
|
|
|
|
if (av_frame_is_writable(in)) { |
|
|
|
direct = 1; |
|
|
|
@@ -359,121 +540,24 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
|
|
|
|
|
|
|
if (s->is_rgb && s->is_16bit && !s->is_planar) { |
|
|
|
/* packed, 16-bit */ |
|
|
|
uint16_t *inrow, *outrow, *inrow0, *outrow0; |
|
|
|
const int w = inlink->w; |
|
|
|
const int h = in->height; |
|
|
|
const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut; |
|
|
|
const int in_linesize = in->linesize[0] / 2; |
|
|
|
const int out_linesize = out->linesize[0] / 2; |
|
|
|
const int step = s->step; |
|
|
|
|
|
|
|
inrow0 = (uint16_t*) in ->data[0]; |
|
|
|
outrow0 = (uint16_t*) out->data[0]; |
|
|
|
|
|
|
|
for (i = 0; i < h; i ++) { |
|
|
|
inrow = inrow0; |
|
|
|
outrow = outrow0; |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
|
|
|
|
switch (step) { |
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
case 4: outrow[3] = av_bswap16(tab[3][av_bswap16(inrow[3])]); // Fall-through |
|
|
|
case 3: outrow[2] = av_bswap16(tab[2][av_bswap16(inrow[2])]); // Fall-through |
|
|
|
case 2: outrow[1] = av_bswap16(tab[1][av_bswap16(inrow[1])]); // Fall-through |
|
|
|
default: outrow[0] = av_bswap16(tab[0][av_bswap16(inrow[0])]); |
|
|
|
#else |
|
|
|
case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through |
|
|
|
case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through |
|
|
|
case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through |
|
|
|
default: outrow[0] = tab[0][inrow[0]]; |
|
|
|
#endif |
|
|
|
} |
|
|
|
outrow += step; |
|
|
|
inrow += step; |
|
|
|
} |
|
|
|
inrow0 += in_linesize; |
|
|
|
outrow0 += out_linesize; |
|
|
|
} |
|
|
|
PACKED_THREAD_DATA |
|
|
|
ctx->internal->execute(ctx, lut_packed_16bits, &td, NULL, |
|
|
|
FFMIN(in->height, ff_filter_get_nb_threads(ctx))); |
|
|
|
} else if (s->is_rgb && !s->is_planar) { |
|
|
|
/* packed */ |
|
|
|
uint8_t *inrow, *outrow, *inrow0, *outrow0; |
|
|
|
const int w = inlink->w; |
|
|
|
const int h = in->height; |
|
|
|
const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut; |
|
|
|
const int in_linesize = in->linesize[0]; |
|
|
|
const int out_linesize = out->linesize[0]; |
|
|
|
const int step = s->step; |
|
|
|
|
|
|
|
inrow0 = in ->data[0]; |
|
|
|
outrow0 = out->data[0]; |
|
|
|
|
|
|
|
for (i = 0; i < h; i ++) { |
|
|
|
inrow = inrow0; |
|
|
|
outrow = outrow0; |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
switch (step) { |
|
|
|
case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through |
|
|
|
case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through |
|
|
|
case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through |
|
|
|
default: outrow[0] = tab[0][inrow[0]]; |
|
|
|
} |
|
|
|
outrow += step; |
|
|
|
inrow += step; |
|
|
|
} |
|
|
|
inrow0 += in_linesize; |
|
|
|
outrow0 += out_linesize; |
|
|
|
} |
|
|
|
/* packed 8 bits */ |
|
|
|
PACKED_THREAD_DATA |
|
|
|
ctx->internal->execute(ctx, lut_packed_8bits, &td, NULL, |
|
|
|
FFMIN(in->height, ff_filter_get_nb_threads(ctx))); |
|
|
|
} else if (s->is_16bit) { |
|
|
|
// planar >8 bit depth |
|
|
|
uint16_t *inrow, *outrow; |
|
|
|
|
|
|
|
for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) { |
|
|
|
int vsub = plane == 1 || plane == 2 ? s->vsub : 0; |
|
|
|
int hsub = plane == 1 || plane == 2 ? s->hsub : 0; |
|
|
|
int h = AV_CEIL_RSHIFT(inlink->h, vsub); |
|
|
|
int w = AV_CEIL_RSHIFT(inlink->w, hsub); |
|
|
|
const uint16_t *tab = s->lut[plane]; |
|
|
|
const int in_linesize = in->linesize[plane] / 2; |
|
|
|
const int out_linesize = out->linesize[plane] / 2; |
|
|
|
|
|
|
|
inrow = (uint16_t *)in ->data[plane]; |
|
|
|
outrow = (uint16_t *)out->data[plane]; |
|
|
|
|
|
|
|
for (i = 0; i < h; i++) { |
|
|
|
for (j = 0; j < w; j++) { |
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]); |
|
|
|
#else |
|
|
|
outrow[j] = tab[inrow[j]]; |
|
|
|
#endif |
|
|
|
} |
|
|
|
inrow += in_linesize; |
|
|
|
outrow += out_linesize; |
|
|
|
} |
|
|
|
} |
|
|
|
/* planar >8 bit depth */ |
|
|
|
PLANAR_THREAD_DATA |
|
|
|
ctx->internal->execute(ctx, lut_planar_16bits, &td, NULL, |
|
|
|
FFMIN(in->height, ff_filter_get_nb_threads(ctx))); |
|
|
|
} else { |
|
|
|
/* planar 8bit depth */ |
|
|
|
uint8_t *inrow, *outrow; |
|
|
|
|
|
|
|
for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) { |
|
|
|
int vsub = plane == 1 || plane == 2 ? s->vsub : 0; |
|
|
|
int hsub = plane == 1 || plane == 2 ? s->hsub : 0; |
|
|
|
int h = AV_CEIL_RSHIFT(inlink->h, vsub); |
|
|
|
int w = AV_CEIL_RSHIFT(inlink->w, hsub); |
|
|
|
const uint16_t *tab = s->lut[plane]; |
|
|
|
const int in_linesize = in->linesize[plane]; |
|
|
|
const int out_linesize = out->linesize[plane]; |
|
|
|
|
|
|
|
inrow = in ->data[plane]; |
|
|
|
outrow = out->data[plane]; |
|
|
|
|
|
|
|
for (i = 0; i < h; i++) { |
|
|
|
for (j = 0; j < w; j++) |
|
|
|
outrow[j] = tab[inrow[j]]; |
|
|
|
inrow += in_linesize; |
|
|
|
outrow += out_linesize; |
|
|
|
} |
|
|
|
} |
|
|
|
PLANAR_THREAD_DATA |
|
|
|
ctx->internal->execute(ctx, lut_planar_8bits, &td, NULL, |
|
|
|
FFMIN(in->height, ff_filter_get_nb_threads(ctx))); |
|
|
|
} |
|
|
|
|
|
|
|
if (!direct) |
|
|
|
@@ -508,7 +592,7 @@ static const AVFilterPad outputs[] = { |
|
|
|
.query_formats = query_formats, \ |
|
|
|
.inputs = inputs, \ |
|
|
|
.outputs = outputs, \ |
|
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \ |
|
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, \ |
|
|
|
} |
|
|
|
|
|
|
|
#if CONFIG_LUT_FILTER |
|
|
|
|