avfilter: add normalize filter

8 years ago · 7d4fe0c5cb
--- a/Changelog
+++ b/Changelog
@@ -19,6 +19,7 @@ version <next>:
 - acontrast audio filter
 - OpenCL overlay filter
 - video mix filter
 - video normalize filter


 version 3.4:
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10867,6 +10867,86 @@ Add temporal and uniform noise to input video:
 noise=alls=20:allf=t+u
@end example

@section normalize

 Normalize RGB video (aka histogram stretching, contrast stretching).
 See: https://en.wikipedia.org/wiki/Normalization_(image_processing)

 For each channel of each frame, the filter computes the input range and maps
 it linearly to the user-specified output range. The output range defaults
 to the full dynamic range from pure black to pure white.

 Temporal smoothing can be used on the input range to reduce flickering (rapid
 changes in brightness) caused when small dark or bright objects enter or leave
 the scene. This is similar to the auto-exposure (automatic gain control) on a
 video camera, and, like a video camera, it may cause a period of over- or
 under-exposure of the video.

 The R,G,B channels can be normalized independently, which may cause some
 color shifting, or linked together as a single channel, which prevents
 color shifting. Linked normalization preserves hue. Independent normalization
 does not, so it can be used to remove some color casts. Independent and linked
 normalization can be combined in any ratio.

 The normalize filter accepts the following options:

@table @option
@item blackpt
@item whitept
 Colors which define the output range. The minimum input value is mapped to
 the @var{blackpt}. The maximum input value is mapped to the @var{whitept}.
 The defaults are black and white respectively. Specifying white for
@var{blackpt} and black for @var{whitept} will give color-inverted,
 normalized video. Shades of grey can be used to reduce the dynamic range
 (contrast). Specifying saturated colors here can create some interesting
 effects.

@item smoothing
 The number of previous frames to use for temporal smoothing. The input range
 of each channel is smoothed using a rolling average over the current frame
 and the @var{smoothing} previous frames. The default is 0 (no temporal
 smoothing).

@item independence
 Controls the ratio of independent (color shifting) channel normalization to
 linked (color preserving) normalization. 0.0 is fully linked, 1.0 is fully
 independent. Defaults to 1.0 (fully independent).

@item strength
 Overall strength of the filter. 1.0 is full strength. 0.0 is a rather
 expensive no-op. Defaults to 1.0 (full strength).

@end table

@subsection Examples

 Stretch video contrast to use the full dynamic range, with no temporal
 smoothing; may flicker depending on the source content:
@example
 normalize=blackpt=black:whitept=white:smoothing=0
@end example

 As above, but with 50 frames of temporal smoothing; flicker should be
 reduced, depending on the source content:
@example
 normalize=blackpt=black:whitept=white:smoothing=50
@end example

 As above, but with hue-preserving linked channel normalization:
@example
 normalize=blackpt=black:whitept=white:smoothing=50:independence=0
@end example

 As above, but with half strength:
@example
 normalize=blackpt=black:whitept=white:smoothing=50:independence=0:strength=0.5
@end example

 Map the darkest input color to red, the brightest input color to cyan:
@example
 normalize=blackpt=red:whitept=cyan
@end example

@section null

 Pass the video source unchanged to the output.
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -247,6 +247,7 @@ OBJS-$(CONFIG_NLMEANS_FILTER)                += vf_nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)                  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
 OBJS-$(CONFIG_NORMALIZE_FILTER)              += vf_normalize.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -257,6 +257,7 @@ static void register_all(void)
    REGISTER_FILTER(NNEDI,          nnedi,          vf);
    REGISTER_FILTER(NOFORMAT,       noformat,       vf);
    REGISTER_FILTER(NOISE,          noise,          vf);
    REGISTER_FILTER(NORMALIZE,      normalize,      vf);
    REGISTER_FILTER(NULL,           null,           vf);
    REGISTER_FILTER(OCR,            ocr,            vf);
    REGISTER_FILTER(OCV,            ocv,            vf);
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
 #define LIBAVFILTER_VERSION_MINOR   3
 #define LIBAVFILTER_VERSION_MINOR   4
 #define LIBAVFILTER_VERSION_MICRO 100

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
--- a/libavfilter/vf_normalize.c
+++ b/libavfilter/vf_normalize.c
@@ -0,0 +1,386 @@
 /*
 * Copyright (c) 2017 Richard Ling
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 /*
 * Normalize RGB video (aka histogram stretching, contrast stretching).
 * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
 *
 * For each channel of each frame, the filter computes the input range and maps
 * it linearly to the user-specified output range. The output range defaults
 * to the full dynamic range from pure black to pure white.
 *
 * Naively maximising the dynamic range of each frame of video in isolation
 * may cause flickering (rapid changes in brightness of static objects in the
 * scene) when small dark or bright objects enter or leave the scene. This
 * filter can apply temporal smoothing to the input range to reduce flickering.
 * Temporal smoothing is similar to the auto-exposure (automatic gain control)
 * on a video camera, which performs the same function; and, like a video
 * camera, it may cause a period of over- or under-exposure of the video.
 *
 * The filter can normalize the R,G,B channels independently, which may cause
 * color shifting, or link them together as a single channel, which prevents
 * color shifting. More precisely, linked normalization preserves hue (as it's
 * defined in HSV/HSL color spaces) while independent normalization does not.
 * Independent normalization can be used to remove color casts, such as the
 * blue cast from underwater video, restoring more natural colors. The filter
 * can also combine independent and linked normalization in any ratio.
 *
 * Finally the overall strength of the filter can be adjusted, from no effect
 * to full normalization.
 *
 * The 5 AVOptions are:
 *   blackpt,   Colors which define the output range. The minimum input value
 *   whitept    is mapped to the blackpt. The maximum input value is mapped to
 *              the whitept. The defaults are black and white respectively.
 *              Specifying white for blackpt and black for whitept will give
 *              color-inverted, normalized video. Shades of grey can be used
 *              to reduce the dynamic range (contrast). Specifying saturated
 *              colors here can create some interesting effects.
 *
 *   smoothing  The amount of temporal smoothing, expressed in frames (>=0).
 *              the minimum and maximum input values of each channel are
 *              smoothed using a rolling average over the current frame and
 *              that many previous frames of video.  Defaults to 0 (no temporal
 *              smoothing).
 *
 *   independence
 *              Controls the ratio of independent (color shifting) channel
 *              normalization to linked (color preserving) normalization. 0.0
 *              is fully linked, 1.0 is fully independent. Defaults to fully
 *              independent.
 *
 *   strength   Overall strength of the filter. 1.0 is full strength. 0.0 is
 *              a rather expensive no-op. Values in between can give a gentle
 *              boost to low-contrast video without creating an artificial
 *              over-processed look. The default is full strength.
 */

 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"

 typedef struct NormalizeContext {
    const AVClass *class;

    // Storage for the corresponding AVOptions
    uint8_t blackpt[4];
    uint8_t whitept[4];
    int smoothing;
    float independence;
    float strength;

    int co[4];          // Offsets to R,G,B,A bytes respectively in each pixel
    int num_components; // Number of components in the pixel format
    int history_len;    // Number of frames to average; based on smoothing factor
    int frame_num;      // Increments on each frame, starting from 0.

    // Per-extremum, per-channel history, for temporal smoothing.
    struct {
        uint8_t *history;       // History entries.
        uint32_t history_sum;   // Sum of history entries.
    } min[3], max[3];           // Min and max for each channel in {R,G,B}.
    uint8_t *history_mem;       // Single allocation for above history entries

 } NormalizeContext;

 #define OFFSET(x) offsetof(NormalizeContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM

 static const AVOption normalize_options[] = {
    { "blackpt",  "output color to which darkest input color is mapped",  OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
    { "whitept",  "output color to which brightest input color is mapped",  OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS },
    { "smoothing",  "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
    { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
    { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
    { NULL }
 };

 AVFILTER_DEFINE_CLASS(normalize);

 // This function is the main guts of the filter. Normalizes the input frame
 // into the output frame. The frames are known to have the same dimensions
 // and pixel format.
 static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
 {
    // Per-extremum, per-channel local variables.
    struct {
        uint8_t in;     // Original input byte value for this frame.
        float smoothed; // Smoothed input value [0,255].
        float out;      // Output value [0,255].
    } min[3], max[3];   // Min and max for each channel in {R,G,B}.

    float rgb_min_smoothed; // Min input range for linked normalization
    float rgb_max_smoothed; // Max input range for linked normalization
    uint8_t lut[3][256];    // Lookup table
    int x, y, c;

    // First, scan the input frame to find, for each channel, the minimum
    // (min.in) and maximum (max.in) values present in the channel.
    for (c = 0; c < 3; c++)
        min[c].in = max[c].in = in->data[0][s->co[c]];
    for (y = 0; y < in->height; y++) {
        uint8_t *inp = in->data[0] + y * in->linesize[0];
        uint8_t *outp = out->data[0] + y * out->linesize[0];
        for (x = 0; x < in->width; x++) {
            for (c = 0; c < 3; c++) {
                min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
                max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
            }
            inp += s->num_components;
            outp += s->num_components;
        }
    }

    // Next, for each channel, push min.in and max.in into their respective
    // histories, to determine the min.smoothed and max.smoothed for this frame.
    {
        int history_idx = s->frame_num % s->history_len;
        // Assume the history is not yet full; num_history_vals is the number
        // of frames received so far including the current frame.
        int num_history_vals = s->frame_num + 1;
        if (s->frame_num >= s->history_len) {
            //The history is full; drop oldest value and cap num_history_vals.
            for (c = 0; c < 3; c++) {
                s->min[c].history_sum -= s->min[c].history[history_idx];
                s->max[c].history_sum -= s->max[c].history[history_idx];
            }
            num_history_vals = s->history_len;
        }
        // For each extremum, update history_sum and calculate smoothed value
        // as the rolling average of the history entries.
        for (c = 0; c < 3; c++) {
            s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
            min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
            s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
            max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
        }
    }

    // Determine the input range for linked normalization. This is simply the
    // minimum of the per-channel minimums, and the maximum of the per-channel
    // maximums.
    rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
    rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);

    // Now, process each channel to determine the input and output range and
    // build the lookup tables.
    for (c = 0; c < 3; c++) {
        int in_val;
        // Adjust the input range for this channel [min.smoothed,max.smoothed]
        // by mixing in the correct proportion of the linked normalization
        // input range [rgb_min_smoothed,rgb_max_smoothed].
        min[c].smoothed = (min[c].smoothed  *         s->independence)
                        + (rgb_min_smoothed * (1.0f - s->independence));
        max[c].smoothed = (max[c].smoothed  *         s->independence)
                        + (rgb_max_smoothed * (1.0f - s->independence));

        // Calculate the output range [min.out,max.out] as a ratio of the full-
        // strength output range [blackpt,whitept] and the original input range
        // [min.in,max.in], based on the user-specified filter strength.
        min[c].out = (s->blackpt[c] *         s->strength)
                   + (min[c].in     * (1.0f - s->strength));
        max[c].out = (s->whitept[c] *         s->strength)
                   + (max[c].in     * (1.0f - s->strength));

        // Now, build a lookup table which linearly maps the adjusted input range
        // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
        // Perform the linear interpolation for each x:
        //     lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
        // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
        if (min[c].smoothed == max[c].smoothed) {
            // There is no dynamic range to expand. No mapping for this channel.
            for (in_val = min[c].in; in_val <= max[c].in; in_val++)
                lut[c][in_val] = min[c].out;
        } else {
            // We must set lookup values for all values in the original input
            // range [min.in,max.in]. Since the original input range may be
            // larger than [min.smoothed,max.smoothed], some output values may
            // fall outside the [0,255] dynamic range. We need to clamp them.
            float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
            for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
                int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
                out_val = FFMAX(out_val, 0);
                out_val = FFMIN(out_val, 255);
                lut[c][in_val] = out_val;
            }
        }
    }

    // Finally, process the pixels of the input frame using the lookup tables.
    for (y = 0; y < in->height; y++) {
        uint8_t *inp = in->data[0] + y * in->linesize[0];
        uint8_t *outp = out->data[0] + y * out->linesize[0];
        for (x = 0; x < in->width; x++) {
            for (c = 0; c < 3; c++)
                outp[s->co[c]] = lut[c][inp[s->co[c]]];
            if (s->num_components == 4)
                // Copy alpha as-is.
                outp[s->co[3]] = inp[s->co[3]];
            inp += s->num_components;
            outp += s->num_components;
        }
    }

    s->frame_num++;
 }

 // Now we define all the functions accessible from the ff_vf_normalize class,
 // which is ffmpeg's interface to our filter.  See doc/filter_design.txt and
 // doc/writing_filters.txt for descriptions of what these interface functions
 // are expected to do.

 // Set the pixel formats that our filter supports. We should be able to process
 // any 8-bit RGB formats. 16-bit support might be useful one day.
 static int query_formats(AVFilterContext *ctx)
 {
    static const enum AVPixelFormat pixel_fmts[] = {
        AV_PIX_FMT_RGB24,
        AV_PIX_FMT_BGR24,
        AV_PIX_FMT_ARGB,
        AV_PIX_FMT_RGBA,
        AV_PIX_FMT_ABGR,
        AV_PIX_FMT_BGRA,
        AV_PIX_FMT_0RGB,
        AV_PIX_FMT_RGB0,
        AV_PIX_FMT_0BGR,
        AV_PIX_FMT_BGR0,
        AV_PIX_FMT_NONE
    };
    // According to filter_design.txt, using ff_set_common_formats() this way
    // ensures the pixel formats of the input and output will be the same. That
    // saves a bit of effort possibly needing to handle format conversions.
    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
    if (!formats)
        return AVERROR(ENOMEM);
    return ff_set_common_formats(ctx, formats);
 }

 // At this point we know the pixel format used for both input and output.  We
 // can also access the frame rate of the input video and allocate some memory
 // appropriately
 static int config_input(AVFilterLink *inlink)
 {
    NormalizeContext *s = inlink->dst->priv;
    // Store offsets to R,G,B,A bytes respectively in each pixel
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
    int c;

    for (c = 0; c < 4; ++c)
        s->co[c] = desc->comp[c].offset;
    s->num_components = desc->nb_components;
    // Convert smoothing value to history_len (a count of frames to average,
    // must be at least 1).  Currently this is a direct assignment, but the
    // smoothing value was originally envisaged as a number of seconds.  In
    // future it would be nice to set history_len using a number of seconds,
    // but VFR video is currently an obstacle to doing so.
    s->history_len = s->smoothing + 1;
    // Allocate the history buffers -- there are 6 -- one for each extrema.
    // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
    // can't overflow on 32bit causing a too-small allocation.
    s->history_mem = av_malloc(s->history_len * 6);
    if (s->history_mem == NULL)
        return AVERROR(ENOMEM);

    for (c = 0; c < 3; c++) {
        s->min[c].history = s->history_mem + (c*2)   * s->history_len;
        s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
    }
    return 0;
 }

 // Free any memory allocations here
 static av_cold void uninit(AVFilterContext *ctx)
 {
    NormalizeContext *s = ctx->priv;

    av_freep(&s->history_mem);
 }

 // This function is pretty much standard from doc/writing_filters.txt.  It
 // tries to do in-place filtering where possible, only allocating a new output
 // frame when absolutely necessary.
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
    AVFilterContext *ctx = inlink->dst;
    AVFilterLink *outlink = ctx->outputs[0];
    NormalizeContext *s = ctx->priv;
    AVFrame *out;
    // Set 'direct' if we can modify the input frame in-place.  Otherwise we
    // need to retrieve a new frame from the output link.
    int direct = av_frame_is_writable(in) && !ctx->is_disabled;

    if (direct) {
        out = in;
    } else {
        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
        if (!out) {
            av_frame_free(&in);
            return AVERROR(ENOMEM);
        }
        av_frame_copy_props(out, in);
    }

    // Now we've got the input and output frames (which may be the same frame)
    // perform the filtering with our custom function.
    normalize(s, in, out);

    if (ctx->is_disabled) {
        av_frame_free(&out);
        return ff_filter_frame(outlink, in);
    }

    if (!direct)
        av_frame_free(&in);

    return ff_filter_frame(outlink, out);
 }

 static const AVFilterPad inputs[] = {
    {
        .name         = "default",
        .type         = AVMEDIA_TYPE_VIDEO,
        .filter_frame = filter_frame,
        .config_props = config_input,
    },
    { NULL }
 };

 static const AVFilterPad outputs[] = {
    {
        .name = "default",
        .type = AVMEDIA_TYPE_VIDEO,
    },
    { NULL }
 };

 AVFilter ff_vf_normalize = {
    .name          = "normalize",
    .description   = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
    .priv_size     = sizeof(NormalizeContext),
    .priv_class    = &normalize_class,
    .uninit        = uninit,
    .query_formats = query_formats,
    .inputs        = inputs,
    .outputs       = outputs,
 };