This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping. An example command to use this filter with vaapi codecs: FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \ opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \ vaapi -i INPUT -filter_hw_device ocl -filter_complex \ '[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \ [x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT Signed-off-by: Ruiling Song <ruiling.song@intel.com>tags/n4.1
| @@ -3412,6 +3412,7 @@ tinterlace_filter_deps="gpl" | |||
| tinterlace_merge_test_deps="tinterlace_filter" | |||
| tinterlace_pad_test_deps="tinterlace_filter" | |||
| tonemap_filter_deps="const_nan" | |||
| tonemap_opencl_filter_deps="opencl const_nan" | |||
| unsharp_opencl_filter_deps="opencl" | |||
| uspp_filter_deps="gpl avcodec" | |||
| vaguedenoiser_filter_deps="gpl" | |||
| @@ -358,6 +358,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o | |||
| OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o | |||
| OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o | |||
| OBJS-$(CONFIG_TONEMAP_FILTER) += vf_tonemap.o | |||
| OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl.o \ | |||
| opencl/tonemap.o opencl/colorspace_common.o | |||
| OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o | |||
| OBJS-$(CONFIG_TRIM_FILTER) += trim.o | |||
| OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o | |||
| @@ -346,6 +346,7 @@ extern AVFilter ff_vf_tinterlace; | |||
| extern AVFilter ff_vf_tlut2; | |||
| extern AVFilter ff_vf_tmix; | |||
| extern AVFilter ff_vf_tonemap; | |||
| extern AVFilter ff_vf_tonemap_opencl; | |||
| extern AVFilter ff_vf_transpose; | |||
| extern AVFilter ff_vf_trim; | |||
| extern AVFilter ff_vf_unpremultiply; | |||
| @@ -0,0 +1,90 @@ | |||
| /* | |||
| * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com> | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "colorspace.h" | |||
| void invert_matrix3x3(const double in[3][3], double out[3][3]) | |||
| { | |||
| double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], | |||
| m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], | |||
| m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; | |||
| int i, j; | |||
| double det; | |||
| out[0][0] = (m11 * m22 - m21 * m12); | |||
| out[0][1] = -(m01 * m22 - m21 * m02); | |||
| out[0][2] = (m01 * m12 - m11 * m02); | |||
| out[1][0] = -(m10 * m22 - m20 * m12); | |||
| out[1][1] = (m00 * m22 - m20 * m02); | |||
| out[1][2] = -(m00 * m12 - m10 * m02); | |||
| out[2][0] = (m10 * m21 - m20 * m11); | |||
| out[2][1] = -(m00 * m21 - m20 * m01); | |||
| out[2][2] = (m00 * m11 - m10 * m01); | |||
| det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2]; | |||
| det = 1.0 / det; | |||
| for (i = 0; i < 3; i++) { | |||
| for (j = 0; j < 3; j++) | |||
| out[i][j] *= det; | |||
| } | |||
| } | |||
| void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]) | |||
| { | |||
| int m, n; | |||
| for (m = 0; m < 3; m++) | |||
| for (n = 0; n < 3; n++) | |||
| dst[m][n] = src2[m][0] * src1[0][n] + | |||
| src2[m][1] * src1[1][n] + | |||
| src2[m][2] * src1[2][n]; | |||
| } | |||
| /* | |||
| * see e.g. http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html | |||
| */ | |||
| void fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, | |||
| const struct WhitepointCoefficients *wp, | |||
| double rgb2xyz[3][3]) | |||
| { | |||
| double i[3][3], sr, sg, sb, zw; | |||
| rgb2xyz[0][0] = coeffs->xr / coeffs->yr; | |||
| rgb2xyz[0][1] = coeffs->xg / coeffs->yg; | |||
| rgb2xyz[0][2] = coeffs->xb / coeffs->yb; | |||
| rgb2xyz[1][0] = rgb2xyz[1][1] = rgb2xyz[1][2] = 1.0; | |||
| rgb2xyz[2][0] = (1.0 - coeffs->xr - coeffs->yr) / coeffs->yr; | |||
| rgb2xyz[2][1] = (1.0 - coeffs->xg - coeffs->yg) / coeffs->yg; | |||
| rgb2xyz[2][2] = (1.0 - coeffs->xb - coeffs->yb) / coeffs->yb; | |||
| invert_matrix3x3(rgb2xyz, i); | |||
| zw = 1.0 - wp->xw - wp->yw; | |||
| sr = i[0][0] * wp->xw + i[0][1] * wp->yw + i[0][2] * zw; | |||
| sg = i[1][0] * wp->xw + i[1][1] * wp->yw + i[1][2] * zw; | |||
| sb = i[2][0] * wp->xw + i[2][1] * wp->yw + i[2][2] * zw; | |||
| rgb2xyz[0][0] *= sr; | |||
| rgb2xyz[0][1] *= sg; | |||
| rgb2xyz[0][2] *= sb; | |||
| rgb2xyz[1][0] *= sr; | |||
| rgb2xyz[1][1] *= sg; | |||
| rgb2xyz[1][2] *= sb; | |||
| rgb2xyz[2][0] *= sr; | |||
| rgb2xyz[2][1] *= sg; | |||
| rgb2xyz[2][2] *= sb; | |||
| } | |||
| @@ -0,0 +1,41 @@ | |||
| /* | |||
| * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com> | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVFILTER_COLORSPACE_H | |||
| #define AVFILTER_COLORSPACE_H | |||
| #include "libavutil/common.h" | |||
| struct LumaCoefficients { | |||
| double cr, cg, cb; | |||
| }; | |||
| struct PrimaryCoefficients { | |||
| double xr, yr, xg, yg, xb, yb; | |||
| }; | |||
| struct WhitepointCoefficients { | |||
| double xw, yw; | |||
| }; | |||
| void invert_matrix3x3(const double in[3][3], double out[3][3]); | |||
| void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]); | |||
| void fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, | |||
| const struct WhitepointCoefficients *wp, double rgb2xyz[3][3]); | |||
| #endif | |||
| @@ -0,0 +1,220 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #define ST2084_MAX_LUMINANCE 10000.0f | |||
| #define REFERENCE_WHITE 100.0f | |||
| #if chroma_loc == 1 | |||
| #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) | |||
| #elif chroma_loc == 3 | |||
| #define chroma_sample(a,b,c,d) (a) | |||
| #elif chroma_loc == 4 | |||
| #define chroma_sample(a,b,c,d) (((a) + (b)) * 0.5f) | |||
| #elif chroma_loc == 5 | |||
| #define chroma_sample(a,b,c,d) (c) | |||
| #elif chroma_loc == 6 | |||
| #define chroma_sample(a,b,c,d) (((c) + (d)) * 0.5f) | |||
| #else | |||
| #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) | |||
| #endif | |||
| constant const float ST2084_M1 = 0.1593017578125f; | |||
| constant const float ST2084_M2 = 78.84375f; | |||
| constant const float ST2084_C1 = 0.8359375f; | |||
| constant const float ST2084_C2 = 18.8515625f; | |||
| constant const float ST2084_C3 = 18.6875f; | |||
| __constant float yuv2rgb_bt2020[] = { | |||
| 1.0f, 0.0f, 1.4746f, | |||
| 1.0f, -0.16455f, -0.57135f, | |||
| 1.0f, 1.8814f, 0.0f | |||
| }; | |||
| __constant float yuv2rgb_bt709[] = { | |||
| 1.0f, 0.0f, 1.5748f, | |||
| 1.0f, -0.18732f, -0.46812f, | |||
| 1.0f, 1.8556f, 0.0f | |||
| }; | |||
| __constant float rgb2yuv_bt709[] = { | |||
| 0.2126f, 0.7152f, 0.0722f, | |||
| -0.11457f, -0.38543f, 0.5f, | |||
| 0.5f, -0.45415f, -0.04585f | |||
| }; | |||
| __constant float rgb2yuv_bt2020[] ={ | |||
| 0.2627f, 0.678f, 0.0593f, | |||
| -0.1396f, -0.36037f, 0.5f, | |||
| 0.5f, -0.4598f, -0.0402f, | |||
| }; | |||
| float get_luma_dst(float3 c) { | |||
| return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; | |||
| } | |||
| float get_luma_src(float3 c) { | |||
| return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z; | |||
| } | |||
| float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) { | |||
| return chroma_sample(a, b, c, d); | |||
| } | |||
| float eotf_st2084(float x) { | |||
| float p = powr(x, 1.0f / ST2084_M2); | |||
| float a = max(p -ST2084_C1, 0.0f); | |||
| float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f); | |||
| float c = powr(a / b, 1.0f / ST2084_M1); | |||
| return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f; | |||
| } | |||
| __constant const float HLG_A = 0.17883277f; | |||
| __constant const float HLG_B = 0.28466892f; | |||
| __constant const float HLG_C = 0.55991073f; | |||
| // linearizer for HLG | |||
| float inverse_oetf_hlg(float x) { | |||
| float a = 4.0f * x * x; | |||
| float b = exp((x - HLG_C) / HLG_A) + HLG_B; | |||
| return x < 0.5f ? a : b; | |||
| } | |||
| // delinearizer for HLG | |||
| float oetf_hlg(float x) { | |||
| float a = 0.5f * sqrt(x); | |||
| float b = HLG_A * log(x - HLG_B) + HLG_C; | |||
| return x <= 1.0f ? a : b; | |||
| } | |||
| float3 ootf_hlg(float3 c, float peak) { | |||
| float luma = get_luma_src(c); | |||
| float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); | |||
| gamma = max(1.0f, gamma); | |||
| float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma); | |||
| return c * factor; | |||
| } | |||
| float3 inverse_ootf_hlg(float3 c, float peak) { | |||
| float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); | |||
| c *= powr(12.0f, gamma) / peak; | |||
| c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma); | |||
| return c; | |||
| } | |||
| float inverse_eotf_bt1886(float c) { | |||
| return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); | |||
| } | |||
| float oetf_bt709(float c) { | |||
| c = c < 0.0f ? 0.0f : c; | |||
| float r1 = 4.5f * c; | |||
| float r2 = 1.099f * powr(c, 0.45f) - 0.099f; | |||
| return c < 0.018f ? r1 : r2; | |||
| } | |||
| float inverse_oetf_bt709(float c) { | |||
| float r1 = c / 4.5f; | |||
| float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); | |||
| return c < 0.081f ? r1 : r2; | |||
| } | |||
| float3 yuv2rgb(float y, float u, float v) { | |||
| #ifdef FULL_RANGE_IN | |||
| u -= 0.5f; v -= 0.5f; | |||
| #else | |||
| y = (y * 255.0f - 16.0f) / 219.0f; | |||
| u = (u * 255.0f - 128.0f) / 224.0f; | |||
| v = (v * 255.0f - 128.0f) / 224.0f; | |||
| #endif | |||
| float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; | |||
| float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; | |||
| float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; | |||
| return (float3)(r, g, b); | |||
| } | |||
| float3 yuv2lrgb(float3 yuv) { | |||
| float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); | |||
| float r = linearize(rgb.x); | |||
| float g = linearize(rgb.y); | |||
| float b = linearize(rgb.z); | |||
| return (float3)(r, g, b); | |||
| } | |||
| float3 rgb2yuv(float r, float g, float b) { | |||
| float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; | |||
| float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; | |||
| float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; | |||
| #ifdef FULL_RANGE_OUT | |||
| u += 0.5f; v += 0.5f; | |||
| #else | |||
| y = (219.0f * y + 16.0f) / 255.0f; | |||
| u = (224.0f * u + 128.0f) / 255.0f; | |||
| v = (224.0f * v + 128.0f) / 255.0f; | |||
| #endif | |||
| return (float3)(y, u, v); | |||
| } | |||
| float rgb2y(float r, float g, float b) { | |||
| float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; | |||
| y = (219.0f * y + 16.0f) / 255.0f; | |||
| return y; | |||
| } | |||
| float3 lrgb2yuv(float3 c) { | |||
| float r = delinearize(c.x); | |||
| float g = delinearize(c.y); | |||
| float b = delinearize(c.z); | |||
| return rgb2yuv(r, g, b); | |||
| } | |||
| float lrgb2y(float3 c) { | |||
| float r = delinearize(c.x); | |||
| float g = delinearize(c.y); | |||
| float b = delinearize(c.z); | |||
| return rgb2y(r, g, b); | |||
| } | |||
| float3 lrgb2lrgb(float3 c) { | |||
| #ifdef RGB2RGB_PASSTHROUGH | |||
| return c; | |||
| #else | |||
| float r = c.x, g = c.y, b = c.z; | |||
| float rr = rgb2rgb[0] * r + rgb2rgb[1] * g + rgb2rgb[2] * b; | |||
| float gg = rgb2rgb[3] * r + rgb2rgb[4] * g + rgb2rgb[5] * b; | |||
| float bb = rgb2rgb[6] * r + rgb2rgb[7] * g + rgb2rgb[8] * b; | |||
| return (float3)(rr, gg, bb); | |||
| #endif | |||
| } | |||
| float3 ootf(float3 c, float peak) { | |||
| #ifdef ootf_impl | |||
| return ootf_impl(c, peak); | |||
| #else | |||
| return c; | |||
| #endif | |||
| } | |||
| float3 inverse_ootf(float3 c, float peak) { | |||
| #ifdef inverse_ootf_impl | |||
| return inverse_ootf_impl(c, peak); | |||
| #else | |||
| return c; | |||
| #endif | |||
| } | |||
| @@ -0,0 +1,272 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #define REFERENCE_WHITE 100.0f | |||
| extern float3 lrgb2yuv(float3); | |||
| extern float lrgb2y(float3); | |||
| extern float3 yuv2lrgb(float3); | |||
| extern float3 lrgb2lrgb(float3); | |||
| extern float get_luma_src(float3); | |||
| extern float get_luma_dst(float3); | |||
| extern float3 ootf(float3 c, float peak); | |||
| extern float3 inverse_ootf(float3 c, float peak); | |||
| extern float3 get_chroma_sample(float3, float3, float3, float3); | |||
| struct detection_result { | |||
| float peak; | |||
| float average; | |||
| }; | |||
| float hable_f(float in) { | |||
| float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; | |||
| return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; | |||
| } | |||
| float direct(float s, float peak) { | |||
| return s; | |||
| } | |||
| float linear(float s, float peak) { | |||
| return s * tone_param / peak; | |||
| } | |||
| float gamma(float s, float peak) { | |||
| float p = s > 0.05f ? s /peak : 0.05f / peak; | |||
| float v = powr(p, 1.0f / tone_param); | |||
| return s > 0.05f ? v : (s * v /0.05f); | |||
| } | |||
| float clip(float s, float peak) { | |||
| return clamp(s * tone_param, 0.0f, 1.0f); | |||
| } | |||
| float reinhard(float s, float peak) { | |||
| return s / (s + tone_param) * (peak + tone_param) / peak; | |||
| } | |||
| float hable(float s, float peak) { | |||
| return hable_f(s)/hable_f(peak); | |||
| } | |||
| float mobius(float s, float peak) { | |||
| float j = tone_param; | |||
| float a, b; | |||
| if (s <= j) | |||
| return s; | |||
| a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); | |||
| b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); | |||
| return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); | |||
| } | |||
| // detect peak/average signal of a frame, the algorithm was ported from: | |||
| // libplacebo (https://github.com/haasn/libplacebo) | |||
| struct detection_result | |||
| detect_peak_avg(global uint *util_buf, __local uint *sum_wg, | |||
| float signal, float peak) { | |||
| // layout of the util buffer | |||
| // | |||
| // Name: : Size (units of 4-bytes) | |||
| // average buffer : detection_frames + 1 | |||
| // peak buffer : detection_frames + 1 | |||
| // workgroup counter : 1 | |||
| // total of peak : 1 | |||
| // total of average : 1 | |||
| // frame index : 1 | |||
| // frame number : 1 | |||
| global uint *avg_buf = util_buf; | |||
| global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; | |||
| global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; | |||
| global uint *max_total_p = counter_wg_p + 1; | |||
| global uint *avg_total_p = max_total_p + 1; | |||
| global uint *frame_idx_p = avg_total_p + 1; | |||
| global uint *scene_frame_num_p = frame_idx_p + 1; | |||
| uint frame_idx = *frame_idx_p; | |||
| uint scene_frame_num = *scene_frame_num_p; | |||
| size_t lidx = get_local_id(0); | |||
| size_t lidy = get_local_id(1); | |||
| size_t lsizex = get_local_size(0); | |||
| size_t lsizey = get_local_size(1); | |||
| uint num_wg = get_num_groups(0) * get_num_groups(1); | |||
| size_t group_idx = get_group_id(0); | |||
| size_t group_idy = get_group_id(1); | |||
| struct detection_result r = {peak, sdr_avg}; | |||
| if (lidx == 0 && lidy == 0) | |||
| *sum_wg = 0; | |||
| barrier(CLK_LOCAL_MEM_FENCE); | |||
| // update workgroup sum | |||
| atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); | |||
| barrier(CLK_LOCAL_MEM_FENCE); | |||
| // update frame peak/avg using work-group-average. | |||
| if (lidx == 0 && lidy == 0) { | |||
| uint avg_wg = *sum_wg / (lsizex * lsizey); | |||
| atomic_max(&peak_buf[frame_idx], avg_wg); | |||
| atomic_add(&avg_buf[frame_idx], avg_wg); | |||
| } | |||
| if (scene_frame_num > 0) { | |||
| float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); | |||
| float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); | |||
| r.peak = max(1.0f, peak); | |||
| r.average = max(0.25f, avg); | |||
| } | |||
| if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { | |||
| *counter_wg_p = 0; | |||
| avg_buf[frame_idx] /= num_wg; | |||
| if (scene_threshold > 0.0f) { | |||
| uint cur_max = peak_buf[frame_idx]; | |||
| uint cur_avg = avg_buf[frame_idx]; | |||
| int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; | |||
| if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { | |||
| for (uint i = 0; i < DETECTION_FRAMES + 1; i++) | |||
| avg_buf[i] = 0; | |||
| for (uint i = 0; i < DETECTION_FRAMES + 1; i++) | |||
| peak_buf[i] = 0; | |||
| *avg_total_p = *max_total_p = 0; | |||
| *scene_frame_num_p = 0; | |||
| avg_buf[frame_idx] = cur_avg; | |||
| peak_buf[frame_idx] = cur_max; | |||
| } | |||
| } | |||
| uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); | |||
| // add current frame, subtract next frame | |||
| *max_total_p += peak_buf[frame_idx] - peak_buf[next]; | |||
| *avg_total_p += avg_buf[frame_idx] - avg_buf[next]; | |||
| // reset next frame | |||
| peak_buf[next] = avg_buf[next] = 0; | |||
| *frame_idx_p = next; | |||
| *scene_frame_num_p = min(*scene_frame_num_p + 1, | |||
| (uint)DETECTION_FRAMES); | |||
| } | |||
| return r; | |||
| } | |||
| float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { | |||
| float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); | |||
| // Rescale the variables in order to bring it into a representation where | |||
| // 1.0 represents the dst_peak. This is because all of the tone mapping | |||
| // algorithms are defined in such a way that they map to the range [0.0, 1.0]. | |||
| if (target_peak > 1.0f) { | |||
| sig *= 1.0f / target_peak; | |||
| peak *= 1.0f / target_peak; | |||
| } | |||
| float sig_old = sig; | |||
| // Scale the signal to compensate for differences in the average brightness | |||
| float slope = min(1.0f, sdr_avg / average); | |||
| sig *= slope; | |||
| peak *= slope; | |||
| // Desaturate the color using a coefficient dependent on the signal level | |||
| if (desat_param > 0.0f) { | |||
| float luma = get_luma_dst(rgb); | |||
| float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); | |||
| coeff = native_powr(coeff, 10.0f / desat_param); | |||
| rgb = mix(rgb, (float3)luma, (float3)coeff); | |||
| sig = mix(sig, luma * slope, coeff); | |||
| } | |||
| sig = TONE_FUNC(sig, peak); | |||
| sig = min(sig, 1.0f); | |||
| rgb *= (sig/sig_old); | |||
| return rgb; | |||
| } | |||
| // map from source space YUV to destination space RGB | |||
| float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { | |||
| float3 c = yuv2lrgb(yuv); | |||
| c = ootf(c, peak); | |||
| c = lrgb2lrgb(c); | |||
| return c; | |||
| } | |||
| __kernel void tonemap(__write_only image2d_t dst1, | |||
| __read_only image2d_t src1, | |||
| __write_only image2d_t dst2, | |||
| __read_only image2d_t src2, | |||
| global uint *util_buf, | |||
| float peak | |||
| ) | |||
| { | |||
| __local uint sum_wg; | |||
| const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | | |||
| CLK_ADDRESS_CLAMP_TO_EDGE | | |||
| CLK_FILTER_NEAREST); | |||
| int xi = get_global_id(0); | |||
| int yi = get_global_id(1); | |||
| // each work item process four pixels | |||
| int x = 2 * xi; | |||
| int y = 2 * yi; | |||
| float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; | |||
| float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; | |||
| float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; | |||
| float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; | |||
| float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; | |||
| float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak); | |||
| float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak); | |||
| float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak); | |||
| float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak); | |||
| float sig0 = max(c0.x, max(c0.y, c0.z)); | |||
| float sig1 = max(c1.x, max(c1.y, c1.z)); | |||
| float sig2 = max(c2.x, max(c2.y, c2.z)); | |||
| float sig3 = max(c3.x, max(c3.y, c3.z)); | |||
| float sig = max(sig0, max(sig1, max(sig2, sig3))); | |||
| struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); | |||
| float3 c0_old = c0, c1_old = c1, c2_old = c2; | |||
| c0 = map_one_pixel_rgb(c0, r.peak, r.average); | |||
| c1 = map_one_pixel_rgb(c1, r.peak, r.average); | |||
| c2 = map_one_pixel_rgb(c2, r.peak, r.average); | |||
| c3 = map_one_pixel_rgb(c3, r.peak, r.average); | |||
| c0 = inverse_ootf(c0, target_peak); | |||
| c1 = inverse_ootf(c1, target_peak); | |||
| c2 = inverse_ootf(c2, target_peak); | |||
| c3 = inverse_ootf(c3, target_peak); | |||
| y0 = lrgb2y(c0); | |||
| y1 = lrgb2y(c1); | |||
| y2 = lrgb2y(c2); | |||
| y3 = lrgb2y(c3); | |||
| float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); | |||
| float3 chroma = lrgb2yuv(chroma_c); | |||
| if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { | |||
| write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); | |||
| write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); | |||
| write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f)); | |||
| write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f)); | |||
| write_imagef(dst2, (int2)(xi, yi), | |||
| (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); | |||
| } | |||
| } | |||
| @@ -20,8 +20,10 @@ | |||
| #define AVFILTER_OPENCL_SOURCE_H | |||
| extern const char *ff_opencl_source_avgblur; | |||
| extern const char *ff_opencl_source_colorspace_common; | |||
| extern const char *ff_opencl_source_convolution; | |||
| extern const char *ff_opencl_source_overlay; | |||
| extern const char *ff_opencl_source_tonemap; | |||
| extern const char *ff_opencl_source_unsharp; | |||
| #endif /* AVFILTER_OPENCL_SOURCE_H */ | |||
| @@ -0,0 +1,624 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <float.h> | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/bprint.h" | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/imgutils.h" | |||
| #include "libavutil/mastering_display_metadata.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "avfilter.h" | |||
| #include "internal.h" | |||
| #include "opencl.h" | |||
| #include "opencl_source.h" | |||
| #include "video.h" | |||
| #include "colorspace.h" | |||
| // TODO: | |||
| // - seperate peak-detection from tone-mapping kernel to solve | |||
| // one-frame-delay issue. | |||
| // - import colorspace matrix generation from vf_colorspace.c | |||
| // - more format support | |||
| #define DETECTION_FRAMES 63 | |||
| #define REFERENCE_WHITE 100.0f | |||
| enum TonemapAlgorithm { | |||
| TONEMAP_NONE, | |||
| TONEMAP_LINEAR, | |||
| TONEMAP_GAMMA, | |||
| TONEMAP_CLIP, | |||
| TONEMAP_REINHARD, | |||
| TONEMAP_HABLE, | |||
| TONEMAP_MOBIUS, | |||
| TONEMAP_MAX, | |||
| }; | |||
| typedef struct TonemapOpenCLContext { | |||
| OpenCLFilterContext ocf; | |||
| enum AVColorSpace colorspace, colorspace_in, colorspace_out; | |||
| enum AVColorTransferCharacteristic trc, trc_in, trc_out; | |||
| enum AVColorPrimaries primaries, primaries_in, primaries_out; | |||
| enum AVColorRange range, range_in, range_out; | |||
| enum AVChromaLocation chroma_loc; | |||
| enum TonemapAlgorithm tonemap; | |||
| enum AVPixelFormat format; | |||
| double peak; | |||
| double param; | |||
| double desat_param; | |||
| double target_peak; | |||
| double scene_threshold; | |||
| int initialised; | |||
| cl_kernel kernel; | |||
| cl_command_queue command_queue; | |||
| cl_mem util_mem; | |||
| } TonemapOpenCLContext; | |||
| const char *yuv_coff[AVCOL_SPC_NB] = { | |||
| [AVCOL_SPC_BT709] = "rgb2yuv_bt709", | |||
| [AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020", | |||
| }; | |||
| const char *rgb_coff[AVCOL_SPC_NB] = { | |||
| [AVCOL_SPC_BT709] = "yuv2rgb_bt709", | |||
| [AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020", | |||
| }; | |||
| const char *linearize_funcs[AVCOL_TRC_NB] = { | |||
| [AVCOL_TRC_SMPTE2084] = "eotf_st2084", | |||
| [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg", | |||
| }; | |||
| const char *delinearize_funcs[AVCOL_TRC_NB] = { | |||
| [AVCOL_TRC_BT709] = "inverse_eotf_bt1886", | |||
| [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886", | |||
| }; | |||
| static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = { | |||
| [AVCOL_SPC_BT709] = { 0.2126, 0.7152, 0.0722 }, | |||
| [AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 }, | |||
| }; | |||
| struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = { | |||
| [AVCOL_PRI_BT709] = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, | |||
| [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, | |||
| }; | |||
| struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = { | |||
| [AVCOL_PRI_BT709] = { 0.3127, 0.3290 }, | |||
| [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 }, | |||
| }; | |||
| const char *tonemap_func[TONEMAP_MAX] = { | |||
| [TONEMAP_NONE] = "direct", | |||
| [TONEMAP_LINEAR] = "linear", | |||
| [TONEMAP_GAMMA] = "gamma", | |||
| [TONEMAP_CLIP] = "clip", | |||
| [TONEMAP_REINHARD] = "reinhard", | |||
| [TONEMAP_HABLE] = "hable", | |||
| [TONEMAP_MOBIUS] = "mobius", | |||
| }; | |||
| static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, | |||
| double rgb2rgb[3][3]) { | |||
| double rgb2xyz[3][3], xyz2rgb[3][3]; | |||
| fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz); | |||
| invert_matrix3x3(rgb2xyz, xyz2rgb); | |||
| fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz); | |||
| mul3x3(rgb2rgb, rgb2xyz, xyz2rgb); | |||
| } | |||
| #define OPENCL_SOURCE_NB 3 | |||
| // Average light level for SDR signals. This is equal to a signal level of 0.5 | |||
| // under a typical presentation gamma of about 2.0. | |||
| static const float sdr_avg = 0.25f; | |||
| static int tonemap_opencl_init(AVFilterContext *avctx) | |||
| { | |||
| TonemapOpenCLContext *ctx = avctx->priv; | |||
| int rgb2rgb_passthrough = 1; | |||
| double rgb2rgb[3][3]; | |||
| struct LumaCoefficients luma_src, luma_dst; | |||
| cl_int cle; | |||
| int err; | |||
| AVBPrint header; | |||
| const char *opencl_sources[OPENCL_SOURCE_NB]; | |||
| av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC); | |||
| switch(ctx->tonemap) { | |||
| case TONEMAP_GAMMA: | |||
| if (isnan(ctx->param)) | |||
| ctx->param = 1.8f; | |||
| break; | |||
| case TONEMAP_REINHARD: | |||
| if (!isnan(ctx->param)) | |||
| ctx->param = (1.0f - ctx->param) / ctx->param; | |||
| break; | |||
| case TONEMAP_MOBIUS: | |||
| if (isnan(ctx->param)) | |||
| ctx->param = 0.3f; | |||
| break; | |||
| } | |||
| if (isnan(ctx->param)) | |||
| ctx->param = 1.0f; | |||
| // SDR peak is 1.0f | |||
| ctx->target_peak = 1.0f; | |||
| av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n", | |||
| av_color_transfer_name(ctx->trc_in), | |||
| av_color_transfer_name(ctx->trc_out)); | |||
| av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n", | |||
| av_color_space_name(ctx->colorspace_in), | |||
| av_color_space_name(ctx->colorspace_out)); | |||
| av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n", | |||
| av_color_primaries_name(ctx->primaries_in), | |||
| av_color_primaries_name(ctx->primaries_out)); | |||
| av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n", | |||
| av_color_range_name(ctx->range_in), | |||
| av_color_range_name(ctx->range_out)); | |||
| // checking valid value just because of limited implementaion | |||
| // please remove when more functionalities are implemented | |||
| av_assert0(ctx->trc_out == AVCOL_TRC_BT709 || | |||
| ctx->trc_out == AVCOL_TRC_BT2020_10); | |||
| av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084|| | |||
| ctx->trc_in == AVCOL_TRC_ARIB_STD_B67); | |||
| av_assert0(ctx->colorspace_in == AVCOL_SPC_BT2020_NCL || | |||
| ctx->colorspace_in == AVCOL_SPC_BT709); | |||
| av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 || | |||
| ctx->primaries_in == AVCOL_PRI_BT709); | |||
| av_bprintf(&header, "__constant const float tone_param = %.4ff;\n", | |||
| ctx->param); | |||
| av_bprintf(&header, "__constant const float desat_param = %.4ff;\n", | |||
| ctx->desat_param); | |||
| av_bprintf(&header, "__constant const float target_peak = %.4ff;\n", | |||
| ctx->target_peak); | |||
| av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg); | |||
| av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n", | |||
| ctx->scene_threshold); | |||
| av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); | |||
| av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES); | |||
| if (ctx->primaries_out != ctx->primaries_in) { | |||
| get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb); | |||
| rgb2rgb_passthrough = 0; | |||
| } | |||
| if (ctx->range_in == AVCOL_RANGE_JPEG) | |||
| av_bprintf(&header, "#define FULL_RANGE_IN\n"); | |||
| if (ctx->range_out == AVCOL_RANGE_JPEG) | |||
| av_bprintf(&header, "#define FULL_RANGE_OUT\n"); | |||
| av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); | |||
| if (rgb2rgb_passthrough) | |||
| av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n"); | |||
| else { | |||
| av_bprintf(&header, "__constant float rgb2rgb[9] = {\n"); | |||
| av_bprintf(&header, " %.4ff, %.4ff, %.4ff,\n", | |||
| rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]); | |||
| av_bprintf(&header, " %.4ff, %.4ff, %.4ff,\n", | |||
| rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]); | |||
| av_bprintf(&header, " %.4ff, %.4ff, %.4ff};\n", | |||
| rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]); | |||
| } | |||
| av_bprintf(&header, "#define rgb_matrix %s\n", | |||
| rgb_coff[ctx->colorspace_in]); | |||
| av_bprintf(&header, "#define yuv_matrix %s\n", | |||
| yuv_coff[ctx->colorspace_out]); | |||
| luma_src = luma_coefficients[ctx->colorspace_in]; | |||
| luma_dst = luma_coefficients[ctx->colorspace_out]; | |||
| av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", | |||
| luma_src.cr, luma_src.cg, luma_src.cb); | |||
| av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", | |||
| luma_dst.cr, luma_dst.cg, luma_dst.cb); | |||
| av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); | |||
| av_bprintf(&header, "#define delinearize %s\n", | |||
| delinearize_funcs[ctx->trc_out]); | |||
| if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67) | |||
| av_bprintf(&header, "#define ootf_impl ootf_hlg\n"); | |||
| if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67) | |||
| av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n"); | |||
| av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); | |||
| opencl_sources[0] = header.str; | |||
| opencl_sources[1] = ff_opencl_source_tonemap; | |||
| opencl_sources[2] = ff_opencl_source_colorspace_common; | |||
| err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB); | |||
| av_bprint_finalize(&header, NULL); | |||
| if (err < 0) | |||
| goto fail; | |||
| ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, | |||
| ctx->ocf.hwctx->device_id, | |||
| 0, &cle); | |||
| if (!ctx->command_queue) { | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " | |||
| "command queue: %d.\n", cle); | |||
| err = AVERROR(EIO); | |||
| goto fail; | |||
| } | |||
| ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle); | |||
| if (!ctx->kernel) { | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); | |||
| err = AVERROR(EIO); | |||
| goto fail; | |||
| } | |||
| ctx->util_mem = | |||
| clCreateBuffer(ctx->ocf.hwctx->context, 0, | |||
| (2 * DETECTION_FRAMES + 7) * sizeof(unsigned), | |||
| NULL, &cle); | |||
| if (cle != CL_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to create util buffer: %d.\n", cle); | |||
| err = AVERROR(EIO); | |||
| goto fail; | |||
| } | |||
| ctx->initialised = 1; | |||
| return 0; | |||
| fail: | |||
| if (ctx->util_mem) | |||
| clReleaseMemObject(ctx->util_mem); | |||
| if (ctx->command_queue) | |||
| clReleaseCommandQueue(ctx->command_queue); | |||
| if (ctx->kernel) | |||
| clReleaseKernel(ctx->kernel); | |||
| return err; | |||
| } | |||
| static int tonemap_opencl_config_output(AVFilterLink *outlink) | |||
| { | |||
| AVFilterContext *avctx = outlink->src; | |||
| TonemapOpenCLContext *s = avctx->priv; | |||
| int ret; | |||
| if (s->format == AV_PIX_FMT_NONE) | |||
| av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n"); | |||
| else { | |||
| if (s->format != AV_PIX_FMT_P010 && | |||
| s->format != AV_PIX_FMT_NV12) { | |||
| av_log(avctx, AV_LOG_ERROR, "unsupported output format," | |||
| "only p010/nv12 supported now\n"); | |||
| return AVERROR(EINVAL); | |||
| } | |||
| } | |||
| s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; | |||
| ret = ff_opencl_filter_config_output(outlink); | |||
| if (ret < 0) | |||
| return ret; | |||
| return 0; | |||
| } | |||
| static int launch_kernel(AVFilterContext *avctx, cl_kernel kernel, | |||
| AVFrame *output, AVFrame *input, float peak) { | |||
| TonemapOpenCLContext *ctx = avctx->priv; | |||
| int err = AVERROR(ENOSYS); | |||
| size_t global_work[2]; | |||
| size_t local_work[2]; | |||
| cl_int cle; | |||
| CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]); | |||
| CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]); | |||
| CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]); | |||
| CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]); | |||
| CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem); | |||
| CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak); | |||
| local_work[0] = 16; | |||
| local_work[1] = 16; | |||
| // Note the work size based on uv plane, as we process a 2x2 quad in one workitem | |||
| err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, | |||
| 1, 16); | |||
| if (err < 0) | |||
| return err; | |||
| cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL, | |||
| global_work, local_work, | |||
| 0, NULL, NULL); | |||
| if (cle != CL_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", | |||
| cle); | |||
| return AVERROR(EIO); | |||
| } | |||
| return 0; | |||
| fail: | |||
| return err; | |||
| } | |||
| static double determine_signal_peak(AVFrame *in) | |||
| { | |||
| AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); | |||
| double peak = 0; | |||
| if (sd) { | |||
| AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; | |||
| peak = clm->MaxCLL / REFERENCE_WHITE; | |||
| } | |||
| sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); | |||
| if (!peak && sd) { | |||
| AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data; | |||
| if (metadata->has_luminance) | |||
| peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE; | |||
| } | |||
| // For untagged source, use peak of 10000 if SMPTE ST.2084 | |||
| // otherwise assume HLG with reference display peak 1000. | |||
| if (!peak) | |||
| peak = in->color_trc == AVCOL_TRC_SMPTE2084 ? 100.0f : 10.0f; | |||
| return peak; | |||
| } | |||
| static void update_metadata(AVFrame *in, double peak) { | |||
| AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); | |||
| if (sd) { | |||
| AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; | |||
| clm->MaxCLL = (unsigned)(peak * REFERENCE_WHITE); | |||
| } | |||
| sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); | |||
| if (sd) { | |||
| AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data; | |||
| if (metadata->has_luminance) | |||
| metadata->max_luminance =av_d2q(peak * REFERENCE_WHITE, 10000); | |||
| } | |||
| } | |||
| static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) | |||
| { | |||
| AVFilterContext *avctx = inlink->dst; | |||
| AVFilterLink *outlink = avctx->outputs[0]; | |||
| TonemapOpenCLContext *ctx = avctx->priv; | |||
| AVFrame *output = NULL; | |||
| cl_int cle; | |||
| int err; | |||
| double peak = ctx->peak; | |||
| AVHWFramesContext *input_frames_ctx = | |||
| (AVHWFramesContext*)input->hw_frames_ctx->data; | |||
| av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", | |||
| av_get_pix_fmt_name(input->format), | |||
| input->width, input->height, input->pts); | |||
| if (!input->hw_frames_ctx) | |||
| return AVERROR(EINVAL); | |||
| output = ff_get_video_buffer(outlink, outlink->w, outlink->h); | |||
| if (!output) { | |||
| err = AVERROR(ENOMEM); | |||
| goto fail; | |||
| } | |||
| err = av_frame_copy_props(output, input); | |||
| if (err < 0) | |||
| goto fail; | |||
| if (!peak) | |||
| peak = determine_signal_peak(input); | |||
| if (ctx->trc != -1) | |||
| output->color_trc = ctx->trc; | |||
| if (ctx->primaries != -1) | |||
| output->color_primaries = ctx->primaries; | |||
| if (ctx->colorspace != -1) | |||
| output->colorspace = ctx->colorspace; | |||
| if (ctx->range != -1) | |||
| output->color_range = ctx->range; | |||
| ctx->trc_in = input->color_trc; | |||
| ctx->trc_out = output->color_trc; | |||
| ctx->colorspace_in = input->colorspace; | |||
| ctx->colorspace_out = output->colorspace; | |||
| ctx->primaries_in = input->color_primaries; | |||
| ctx->primaries_out = output->color_primaries; | |||
| ctx->range_in = input->color_range; | |||
| ctx->range_out = output->color_range; | |||
| ctx->chroma_loc = output->chroma_location; | |||
| if (!ctx->initialised) { | |||
| if (!(input->color_trc == AVCOL_TRC_SMPTE2084 || | |||
| input->color_trc == AVCOL_TRC_ARIB_STD_B67)) { | |||
| av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n"); | |||
| err = AVERROR(ENOSYS); | |||
| goto fail; | |||
| } | |||
| if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) { | |||
| av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n"); | |||
| err = AVERROR(ENOSYS); | |||
| goto fail; | |||
| } | |||
| err = tonemap_opencl_init(avctx); | |||
| if (err < 0) | |||
| goto fail; | |||
| } | |||
| switch(input_frames_ctx->sw_format) { | |||
| case AV_PIX_FMT_P010: | |||
| err = launch_kernel(avctx, ctx->kernel, output, input, peak); | |||
| if (err < 0) goto fail; | |||
| break; | |||
| default: | |||
| err = AVERROR(ENOSYS); | |||
| goto fail; | |||
| } | |||
| cle = clFinish(ctx->command_queue); | |||
| if (cle != CL_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n", | |||
| cle); | |||
| err = AVERROR(EIO); | |||
| goto fail; | |||
| } | |||
| av_frame_free(&input); | |||
| update_metadata(output, ctx->target_peak); | |||
| av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n", | |||
| av_get_pix_fmt_name(output->format), | |||
| output->width, output->height, output->pts); | |||
| #ifndef NDEBUG | |||
| { | |||
| uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p; | |||
| float peak_detected, avg_detected; | |||
| unsigned map_size = (2 * DETECTION_FRAMES + 7) * sizeof(unsigned); | |||
| ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem, | |||
| CL_TRUE, CL_MAP_READ, 0, map_size, | |||
| 0, NULL, NULL, &cle); | |||
| // For the layout of the util buffer, refer tonemap.cl | |||
| if (ptr) { | |||
| max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1; | |||
| avg_total_p = max_total_p + 1; | |||
| frame_number_p = avg_total_p + 2; | |||
| peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p)); | |||
| avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p)); | |||
| av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n", | |||
| peak_detected, avg_detected); | |||
| clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0, | |||
| NULL, NULL); | |||
| } | |||
| } | |||
| #endif | |||
| return ff_filter_frame(outlink, output); | |||
| fail: | |||
| clFinish(ctx->command_queue); | |||
| av_frame_free(&input); | |||
| av_frame_free(&output); | |||
| return err; | |||
| } | |||
| static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) | |||
| { | |||
| TonemapOpenCLContext *ctx = avctx->priv; | |||
| cl_int cle; | |||
| if (ctx->util_mem) | |||
| clReleaseMemObject(ctx->util_mem); | |||
| if (ctx->kernel) { | |||
| cle = clReleaseKernel(ctx->kernel); | |||
| if (cle != CL_SUCCESS) | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to release " | |||
| "kernel: %d.\n", cle); | |||
| } | |||
| if (ctx->command_queue) { | |||
| cle = clReleaseCommandQueue(ctx->command_queue); | |||
| if (cle != CL_SUCCESS) | |||
| av_log(avctx, AV_LOG_ERROR, "Failed to release " | |||
| "command queue: %d.\n", cle); | |||
| } | |||
| ff_opencl_filter_uninit(avctx); | |||
| } | |||
| #define OFFSET(x) offsetof(TonemapOpenCLContext, x) | |||
| #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) | |||
| static const AVOption tonemap_opencl_options[] = { | |||
| { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" }, | |||
| { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, "tonemap" }, | |||
| { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, "tonemap" }, | |||
| { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, "tonemap" }, | |||
| { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, "tonemap" }, | |||
| { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" }, | |||
| { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" }, | |||
| { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" }, | |||
| { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, | |||
| { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, | |||
| { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" }, | |||
| { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, "transfer" }, | |||
| { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" }, | |||
| { "m", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" }, | |||
| { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, "matrix" }, | |||
| { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, "matrix" }, | |||
| { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" }, | |||
| { "p", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" }, | |||
| { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, "primaries" }, | |||
| { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, "primaries" }, | |||
| { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" }, | |||
| { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" }, | |||
| { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, | |||
| { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, | |||
| { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, | |||
| { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, | |||
| { "format", "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, | |||
| { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, | |||
| { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, | |||
| { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, | |||
| { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, | |||
| { NULL } | |||
| }; | |||
| AVFILTER_DEFINE_CLASS(tonemap_opencl); | |||
| static const AVFilterPad tonemap_opencl_inputs[] = { | |||
| { | |||
| .name = "default", | |||
| .type = AVMEDIA_TYPE_VIDEO, | |||
| .filter_frame = &tonemap_opencl_filter_frame, | |||
| .config_props = &ff_opencl_filter_config_input, | |||
| }, | |||
| { NULL } | |||
| }; | |||
| static const AVFilterPad tonemap_opencl_outputs[] = { | |||
| { | |||
| .name = "default", | |||
| .type = AVMEDIA_TYPE_VIDEO, | |||
| .config_props = &tonemap_opencl_config_output, | |||
| }, | |||
| { NULL } | |||
| }; | |||
| AVFilter ff_vf_tonemap_opencl = { | |||
| .name = "tonemap_opencl", | |||
| .description = NULL_IF_CONFIG_SMALL("perform HDR to SDR conversion with tonemapping"), | |||
| .priv_size = sizeof(TonemapOpenCLContext), | |||
| .priv_class = &tonemap_opencl_class, | |||
| .init = &ff_opencl_filter_init, | |||
| .uninit = &tonemap_opencl_uninit, | |||
| .query_formats = &ff_opencl_filter_query_formats, | |||
| .inputs = tonemap_opencl_inputs, | |||
| .outputs = tonemap_opencl_outputs, | |||
| .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, | |||
| }; | |||