|
|
@@ -0,0 +1,360 @@ |
|
|
|
/* |
|
|
|
* This file is part of FFmpeg. |
|
|
|
* |
|
|
|
* FFmpeg is free software; you can redistribute it and/or |
|
|
|
* modify it under the terms of the GNU Lesser General Public |
|
|
|
* License as published by the Free Software Foundation; either |
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
|
|
|
* |
|
|
|
* FFmpeg is distributed in the hope that it will be useful, |
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
|
|
* Lesser General Public License for more details. |
|
|
|
* |
|
|
|
* You should have received a copy of the GNU Lesser General Public |
|
|
|
* License along with FFmpeg; if not, write to the Free Software |
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "libavutil/avassert.h" |
|
|
|
#include "libavutil/buffer.h" |
|
|
|
#include "libavutil/common.h" |
|
|
|
#include "libavutil/hwcontext.h" |
|
|
|
#include "libavutil/hwcontext_opencl.h" |
|
|
|
#include "libavutil/log.h" |
|
|
|
#include "libavutil/mathematics.h" |
|
|
|
#include "libavutil/mem.h" |
|
|
|
#include "libavutil/pixdesc.h" |
|
|
|
#include "libavutil/opt.h" |
|
|
|
|
|
|
|
#include "avfilter.h" |
|
|
|
#include "framesync.h" |
|
|
|
#include "internal.h" |
|
|
|
#include "opencl.h" |
|
|
|
#include "opencl_source.h" |
|
|
|
#include "video.h" |
|
|
|
|
|
|
|
typedef struct OverlayOpenCLContext { |
|
|
|
OpenCLFilterContext ocf; |
|
|
|
|
|
|
|
int initialised; |
|
|
|
cl_kernel kernel; |
|
|
|
cl_command_queue command_queue; |
|
|
|
|
|
|
|
FFFrameSync fs; |
|
|
|
|
|
|
|
int nb_planes; |
|
|
|
int x_subsample; |
|
|
|
int y_subsample; |
|
|
|
int alpha_separate; |
|
|
|
|
|
|
|
int x_position; |
|
|
|
int y_position; |
|
|
|
} OverlayOpenCLContext; |
|
|
|
|
|
|
|
static int overlay_opencl_load(AVFilterContext *avctx, |
|
|
|
enum AVPixelFormat main_format, |
|
|
|
enum AVPixelFormat overlay_format) |
|
|
|
{ |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
cl_int cle; |
|
|
|
const char *source = ff_opencl_source_overlay; |
|
|
|
const char *kernel; |
|
|
|
const AVPixFmtDescriptor *main_desc, *overlay_desc; |
|
|
|
int err, i, main_planes, overlay_planes; |
|
|
|
|
|
|
|
main_desc = av_pix_fmt_desc_get(main_format); |
|
|
|
overlay_desc = av_pix_fmt_desc_get(overlay_format); |
|
|
|
|
|
|
|
main_planes = overlay_planes = 0; |
|
|
|
for (i = 0; i < main_desc->nb_components; i++) |
|
|
|
main_planes = FFMAX(main_planes, |
|
|
|
main_desc->comp[i].plane + 1); |
|
|
|
for (i = 0; i < overlay_desc->nb_components; i++) |
|
|
|
overlay_planes = FFMAX(overlay_planes, |
|
|
|
overlay_desc->comp[i].plane + 1); |
|
|
|
|
|
|
|
ctx->nb_planes = main_planes; |
|
|
|
ctx->x_subsample = 1 << main_desc->log2_chroma_w; |
|
|
|
ctx->y_subsample = 1 << main_desc->log2_chroma_h; |
|
|
|
|
|
|
|
if (ctx->x_position % ctx->x_subsample || |
|
|
|
ctx->y_position % ctx->y_subsample) { |
|
|
|
av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) " |
|
|
|
"does not match subsampling (%d, %d).\n", |
|
|
|
ctx->x_position, ctx->y_position, |
|
|
|
ctx->x_subsample, ctx->y_subsample); |
|
|
|
} |
|
|
|
|
|
|
|
if (main_planes == overlay_planes) { |
|
|
|
if (main_desc->nb_components == overlay_desc->nb_components) |
|
|
|
kernel = "overlay_no_alpha"; |
|
|
|
else |
|
|
|
kernel = "overlay_internal_alpha"; |
|
|
|
ctx->alpha_separate = 0; |
|
|
|
} else { |
|
|
|
kernel = "overlay_external_alpha"; |
|
|
|
ctx->alpha_separate = 1; |
|
|
|
} |
|
|
|
|
|
|
|
av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); |
|
|
|
|
|
|
|
err = ff_opencl_filter_load_program(avctx, &source, 1); |
|
|
|
if (err < 0) |
|
|
|
goto fail; |
|
|
|
|
|
|
|
ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, |
|
|
|
ctx->ocf.hwctx->device_id, |
|
|
|
0, &cle); |
|
|
|
if (!ctx->command_queue) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " |
|
|
|
"command queue: %d.\n", cle); |
|
|
|
err = AVERROR(EIO); |
|
|
|
goto fail; |
|
|
|
} |
|
|
|
|
|
|
|
ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle); |
|
|
|
if (!ctx->kernel) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); |
|
|
|
err = AVERROR(EIO); |
|
|
|
goto fail; |
|
|
|
} |
|
|
|
|
|
|
|
ctx->initialised = 1; |
|
|
|
return 0; |
|
|
|
|
|
|
|
fail: |
|
|
|
if (ctx->command_queue) |
|
|
|
clReleaseCommandQueue(ctx->command_queue); |
|
|
|
if (ctx->kernel) |
|
|
|
clReleaseKernel(ctx->kernel); |
|
|
|
return err; |
|
|
|
} |
|
|
|
|
|
|
|
static int overlay_opencl_blend(FFFrameSync *fs) |
|
|
|
{ |
|
|
|
AVFilterContext *avctx = fs->parent; |
|
|
|
AVFilterLink *outlink = avctx->outputs[0]; |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
AVFrame *input_main, *input_overlay; |
|
|
|
AVFrame *output; |
|
|
|
cl_mem mem; |
|
|
|
cl_int cle, x, y; |
|
|
|
size_t global_work[2]; |
|
|
|
int kernel_arg = 0; |
|
|
|
int err, plane; |
|
|
|
|
|
|
|
err = ff_framesync_get_frame(fs, 0, &input_main, 0); |
|
|
|
if (err < 0) |
|
|
|
return err; |
|
|
|
err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); |
|
|
|
if (err < 0) |
|
|
|
return err; |
|
|
|
|
|
|
|
if (!ctx->initialised) { |
|
|
|
AVHWFramesContext *main_fc = |
|
|
|
(AVHWFramesContext*)input_main->hw_frames_ctx->data; |
|
|
|
AVHWFramesContext *overlay_fc = |
|
|
|
(AVHWFramesContext*)input_overlay->hw_frames_ctx->data; |
|
|
|
|
|
|
|
err = overlay_opencl_load(avctx, main_fc->sw_format, |
|
|
|
overlay_fc->sw_format); |
|
|
|
if (err < 0) |
|
|
|
return err; |
|
|
|
} |
|
|
|
|
|
|
|
output = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
|
|
|
if (!output) { |
|
|
|
err = AVERROR(ENOMEM); |
|
|
|
goto fail; |
|
|
|
} |
|
|
|
|
|
|
|
for (plane = 0; plane < ctx->nb_planes; plane++) { |
|
|
|
kernel_arg = 0; |
|
|
|
|
|
|
|
mem = (cl_mem)output->data[plane]; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
|
|
|
|
mem = (cl_mem)input_main->data[plane]; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
|
|
|
|
mem = (cl_mem)input_overlay->data[plane]; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
|
|
|
|
if (ctx->alpha_separate) { |
|
|
|
mem = (cl_mem)input_overlay->data[ctx->nb_planes]; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
} |
|
|
|
|
|
|
|
x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample); |
|
|
|
y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample); |
|
|
|
|
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &x); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &y); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
|
|
|
|
if (ctx->alpha_separate) { |
|
|
|
cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample; |
|
|
|
cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample; |
|
|
|
|
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_adj_x); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_adj_y); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
goto fail_kernel_arg; |
|
|
|
} |
|
|
|
|
|
|
|
global_work[0] = output->width; |
|
|
|
global_work[1] = output->height; |
|
|
|
|
|
|
|
cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, |
|
|
|
global_work, NULL, 0, NULL, NULL); |
|
|
|
if (cle != CL_SUCCESS) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to enqueue " |
|
|
|
"overlay kernel for plane %d: %d.\n", cle, plane); |
|
|
|
err = AVERROR(EIO); |
|
|
|
goto fail; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
cle = clFinish(ctx->command_queue); |
|
|
|
if (cle != CL_SUCCESS) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to finish " |
|
|
|
"command queue: %d.\n", cle); |
|
|
|
err = AVERROR(EIO); |
|
|
|
goto fail; |
|
|
|
} |
|
|
|
|
|
|
|
err = av_frame_copy_props(output, input_main); |
|
|
|
|
|
|
|
av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", |
|
|
|
av_get_pix_fmt_name(output->format), |
|
|
|
output->width, output->height, output->pts); |
|
|
|
|
|
|
|
return ff_filter_frame(outlink, output); |
|
|
|
|
|
|
|
fail_kernel_arg: |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to set kernel arg %d: %d.\n", |
|
|
|
kernel_arg, cle); |
|
|
|
err = AVERROR(EIO); |
|
|
|
fail: |
|
|
|
return err; |
|
|
|
} |
|
|
|
|
|
|
|
static int overlay_opencl_config_output(AVFilterLink *outlink) |
|
|
|
{ |
|
|
|
AVFilterContext *avctx = outlink->src; |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
int err; |
|
|
|
|
|
|
|
err = ff_opencl_filter_config_output(outlink); |
|
|
|
if (err < 0) |
|
|
|
return err; |
|
|
|
|
|
|
|
err = ff_framesync_init_dualinput(&ctx->fs, avctx); |
|
|
|
if (err < 0) |
|
|
|
return err; |
|
|
|
|
|
|
|
return ff_framesync_configure(&ctx->fs); |
|
|
|
} |
|
|
|
|
|
|
|
static av_cold int overlay_opencl_init(AVFilterContext *avctx) |
|
|
|
{ |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
|
|
|
|
ctx->fs.on_event = &overlay_opencl_blend; |
|
|
|
|
|
|
|
return ff_opencl_filter_init(avctx); |
|
|
|
} |
|
|
|
|
|
|
|
static int overlay_opencl_activate(AVFilterContext *avctx) |
|
|
|
{ |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
|
|
|
|
return ff_framesync_activate(&ctx->fs); |
|
|
|
} |
|
|
|
|
|
|
|
static av_cold void overlay_opencl_uninit(AVFilterContext *avctx) |
|
|
|
{ |
|
|
|
OverlayOpenCLContext *ctx = avctx->priv; |
|
|
|
cl_int cle; |
|
|
|
|
|
|
|
if (ctx->kernel) { |
|
|
|
cle = clReleaseKernel(ctx->kernel); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to release " |
|
|
|
"kernel: %d.\n", cle); |
|
|
|
} |
|
|
|
|
|
|
|
if (ctx->command_queue) { |
|
|
|
cle = clReleaseCommandQueue(ctx->command_queue); |
|
|
|
if (cle != CL_SUCCESS) |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Failed to release " |
|
|
|
"command queue: %d.\n", cle); |
|
|
|
} |
|
|
|
|
|
|
|
ff_opencl_filter_uninit(avctx); |
|
|
|
|
|
|
|
ff_framesync_uninit(&ctx->fs); |
|
|
|
} |
|
|
|
|
|
|
|
#define OFFSET(x) offsetof(OverlayOpenCLContext, x) |
|
|
|
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) |
|
|
|
static const AVOption overlay_opencl_options[] = { |
|
|
|
{ "x", "Overlay x position", |
|
|
|
OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, |
|
|
|
{ "y", "Overlay y position", |
|
|
|
OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, |
|
|
|
{ NULL }, |
|
|
|
}; |
|
|
|
|
|
|
|
AVFILTER_DEFINE_CLASS(overlay_opencl); |
|
|
|
|
|
|
|
static const AVFilterPad overlay_opencl_inputs[] = { |
|
|
|
{ |
|
|
|
.name = "main", |
|
|
|
.type = AVMEDIA_TYPE_VIDEO, |
|
|
|
.config_props = &ff_opencl_filter_config_input, |
|
|
|
}, |
|
|
|
{ |
|
|
|
.name = "overlay", |
|
|
|
.type = AVMEDIA_TYPE_VIDEO, |
|
|
|
.config_props = &ff_opencl_filter_config_input, |
|
|
|
}, |
|
|
|
{ NULL } |
|
|
|
}; |
|
|
|
|
|
|
|
static const AVFilterPad overlay_opencl_outputs[] = { |
|
|
|
{ |
|
|
|
.name = "default", |
|
|
|
.type = AVMEDIA_TYPE_VIDEO, |
|
|
|
.config_props = &overlay_opencl_config_output, |
|
|
|
}, |
|
|
|
{ NULL } |
|
|
|
}; |
|
|
|
|
|
|
|
AVFilter ff_vf_overlay_opencl = { |
|
|
|
.name = "overlay_opencl", |
|
|
|
.description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"), |
|
|
|
.priv_size = sizeof(OverlayOpenCLContext), |
|
|
|
.priv_class = &overlay_opencl_class, |
|
|
|
.init = &overlay_opencl_init, |
|
|
|
.uninit = &overlay_opencl_uninit, |
|
|
|
.query_formats = &ff_opencl_filter_query_formats, |
|
|
|
.activate = &overlay_opencl_activate, |
|
|
|
.inputs = overlay_opencl_inputs, |
|
|
|
.outputs = overlay_opencl_outputs, |
|
|
|
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, |
|
|
|
}; |