Add a new function to find the global work size given the output image and the required block alignment, then use it in the overlay, program and unsharp filters. Fixes the overlay and unsharp filters applying the kernel to locations outside the frame when subsampled planes are present.tags/n4.0
@@ -22,6 +22,7 @@ | |||||
#include "libavutil/hwcontext.h" | #include "libavutil/hwcontext.h" | ||||
#include "libavutil/hwcontext_opencl.h" | #include "libavutil/hwcontext_opencl.h" | ||||
#include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
#include "libavutil/pixdesc.h" | |||||
#include "avfilter.h" | #include "avfilter.h" | ||||
#include "formats.h" | #include "formats.h" | ||||
@@ -276,3 +277,66 @@ fail: | |||||
av_freep(&src); | av_freep(&src); | ||||
return err; | return err; | ||||
} | } | ||||
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, | |||||
size_t *work_size, | |||||
AVFrame *frame, int plane, | |||||
int block_alignment) | |||||
{ | |||||
cl_mem image; | |||||
cl_mem_object_type type; | |||||
size_t width, height; | |||||
cl_int cle; | |||||
if (frame->format != AV_PIX_FMT_OPENCL) { | |||||
av_log(avctx, AV_LOG_ERROR, "Invalid frame format %s, " | |||||
"opencl required.\n", av_get_pix_fmt_name(frame->format)); | |||||
return AVERROR(EINVAL); | |||||
} | |||||
image = (cl_mem)frame->data[plane]; | |||||
if (!image) { | |||||
av_log(avctx, AV_LOG_ERROR, "Plane %d required but not set.\n", | |||||
plane); | |||||
return AVERROR(EINVAL); | |||||
} | |||||
cle = clGetMemObjectInfo(image, CL_MEM_TYPE, sizeof(type), | |||||
&type, NULL); | |||||
if (cle != CL_SUCCESS) { | |||||
av_log(avctx, AV_LOG_ERROR, "Failed to query object type of " | |||||
"plane %d: %d.\n", plane, cle); | |||||
return AVERROR_UNKNOWN; | |||||
} | |||||
if (type != CL_MEM_OBJECT_IMAGE2D) { | |||||
av_log(avctx, AV_LOG_ERROR, "Plane %d is not a 2D image.\n", | |||||
plane); | |||||
return AVERROR(EINVAL); | |||||
} | |||||
cle = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(size_t), | |||||
&width, NULL); | |||||
if (cle != CL_SUCCESS) { | |||||
av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n", | |||||
plane, cle); | |||||
return AVERROR_UNKNOWN; | |||||
} | |||||
cle = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t), | |||||
&height, NULL); | |||||
if (cle != CL_SUCCESS) { | |||||
av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n", | |||||
plane, cle); | |||||
return AVERROR_UNKNOWN; | |||||
} | |||||
if (block_alignment) { | |||||
width = FFALIGN(width, block_alignment); | |||||
height = FFALIGN(height, block_alignment); | |||||
} | |||||
work_size[0] = width; | |||||
work_size[1] = height; | |||||
return 0; | |||||
} |
@@ -84,4 +84,12 @@ int ff_opencl_filter_load_program(AVFilterContext *avctx, | |||||
int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx, | int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx, | ||||
const char *filename); | const char *filename); | ||||
/** | |||||
* Find the work size needed needed for a given plane of an image. | |||||
*/ | |||||
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, | |||||
size_t *work_size, | |||||
AVFrame *frame, int plane, | |||||
int block_alignment); | |||||
#endif /* AVFILTER_OPENCL_H */ | #endif /* AVFILTER_OPENCL_H */ |
@@ -216,8 +216,10 @@ static int overlay_opencl_blend(FFFrameSync *fs) | |||||
goto fail_kernel_arg; | goto fail_kernel_arg; | ||||
} | } | ||||
global_work[0] = output->width; | |||||
global_work[1] = output->height; | |||||
err = ff_opencl_filter_work_size_from_image(avctx, global_work, | |||||
output, plane, 0); | |||||
if (err < 0) | |||||
goto fail; | |||||
cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, | cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, | ||||
global_work, NULL, 0, NULL, NULL); | global_work, NULL, 0, NULL, NULL); | ||||
@@ -142,10 +142,10 @@ static int program_opencl_run(AVFilterContext *avctx) | |||||
} | } | ||||
} | } | ||||
cle = clGetImageInfo(dst, CL_IMAGE_WIDTH, sizeof(size_t), | |||||
&global_work[0], NULL); | |||||
cle = clGetImageInfo(dst, CL_IMAGE_HEIGHT, sizeof(size_t), | |||||
&global_work[1], NULL); | |||||
err = ff_opencl_filter_work_size_from_image(avctx, global_work, | |||||
output, plane, 0); | |||||
if (err < 0) | |||||
goto fail; | |||||
av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " | av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " | ||||
"(%zux%zu).\n", plane, global_work[0], global_work[1]); | "(%zux%zu).\n", plane, global_work[0], global_work[1]); | ||||
@@ -320,15 +320,13 @@ static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) | |||||
} | } | ||||
} | } | ||||
if (ctx->global) { | |||||
global_work[0] = output->width; | |||||
global_work[1] = output->height; | |||||
} else { | |||||
global_work[0] = FFALIGN(output->width, 16); | |||||
global_work[1] = FFALIGN(output->height, 16); | |||||
local_work[0] = 16; | |||||
local_work[1] = 16; | |||||
} | |||||
err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, | |||||
ctx->global ? 0 : 16); | |||||
if (err < 0) | |||||
goto fail; | |||||
local_work[0] = 16; | |||||
local_work[1] = 16; | |||||
av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " | av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " | ||||
"(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", | "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", | ||||