| @@ -24,6 +24,7 @@ | |||||
| #include "mem.h" | #include "mem.h" | ||||
| #include "pixdesc.h" | #include "pixdesc.h" | ||||
| #include "pixfmt.h" | #include "pixfmt.h" | ||||
| #include "imgutils.h" | |||||
| #define CUDA_FRAME_ALIGNMENT 256 | #define CUDA_FRAME_ALIGNMENT 256 | ||||
| @@ -117,7 +118,6 @@ fail: | |||||
| static int cuda_frames_init(AVHWFramesContext *ctx) | static int cuda_frames_init(AVHWFramesContext *ctx) | ||||
| { | { | ||||
| CUDAFramesContext *priv = ctx->internal->priv; | CUDAFramesContext *priv = ctx->internal->priv; | ||||
| int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT); | |||||
| int i; | int i; | ||||
| for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { | for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { | ||||
| @@ -133,29 +133,9 @@ static int cuda_frames_init(AVHWFramesContext *ctx) | |||||
| av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); | av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); | ||||
| if (!ctx->pool) { | if (!ctx->pool) { | ||||
| int size; | |||||
| switch (ctx->sw_format) { | |||||
| case AV_PIX_FMT_NV12: | |||||
| case AV_PIX_FMT_YUV420P: | |||||
| size = aligned_width * ctx->height * 3 / 2; | |||||
| break; | |||||
| case AV_PIX_FMT_YUV444P: | |||||
| case AV_PIX_FMT_P010: | |||||
| case AV_PIX_FMT_P016: | |||||
| size = aligned_width * ctx->height * 3; | |||||
| break; | |||||
| case AV_PIX_FMT_YUV444P16: | |||||
| size = aligned_width * ctx->height * 6; | |||||
| break; | |||||
| case AV_PIX_FMT_0RGB32: | |||||
| case AV_PIX_FMT_0BGR32: | |||||
| size = aligned_width * ctx->height * 4; | |||||
| break; | |||||
| default: | |||||
| av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation."); | |||||
| return AVERROR_BUG; | |||||
| } | |||||
| int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT); | |||||
| if (size < 0) | |||||
| return size; | |||||
| ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); | ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); | ||||
| if (!ctx->internal->pool_internal) | if (!ctx->internal->pool_internal) | ||||
| @@ -167,54 +147,22 @@ static int cuda_frames_init(AVHWFramesContext *ctx) | |||||
| static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) | static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) | ||||
| { | { | ||||
| int aligned_width; | |||||
| int width_in_bytes = ctx->width; | |||||
| if (ctx->sw_format == AV_PIX_FMT_P010 || | |||||
| ctx->sw_format == AV_PIX_FMT_P016 || | |||||
| ctx->sw_format == AV_PIX_FMT_YUV444P16) { | |||||
| width_in_bytes *= 2; | |||||
| } | |||||
| aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT); | |||||
| int res; | |||||
| frame->buf[0] = av_buffer_pool_get(ctx->pool); | frame->buf[0] = av_buffer_pool_get(ctx->pool); | ||||
| if (!frame->buf[0]) | if (!frame->buf[0]) | ||||
| return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
| switch (ctx->sw_format) { | |||||
| case AV_PIX_FMT_NV12: | |||||
| case AV_PIX_FMT_P010: | |||||
| case AV_PIX_FMT_P016: | |||||
| frame->data[0] = frame->buf[0]->data; | |||||
| frame->data[1] = frame->data[0] + aligned_width * ctx->height; | |||||
| frame->linesize[0] = aligned_width; | |||||
| frame->linesize[1] = aligned_width; | |||||
| break; | |||||
| case AV_PIX_FMT_YUV420P: | |||||
| frame->data[0] = frame->buf[0]->data; | |||||
| frame->data[2] = frame->data[0] + aligned_width * ctx->height; | |||||
| frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4; | |||||
| frame->linesize[0] = aligned_width; | |||||
| frame->linesize[1] = aligned_width / 2; | |||||
| frame->linesize[2] = aligned_width / 2; | |||||
| break; | |||||
| case AV_PIX_FMT_YUV444P: | |||||
| case AV_PIX_FMT_YUV444P16: | |||||
| frame->data[0] = frame->buf[0]->data; | |||||
| frame->data[1] = frame->data[0] + aligned_width * ctx->height; | |||||
| frame->data[2] = frame->data[1] + aligned_width * ctx->height; | |||||
| frame->linesize[0] = aligned_width; | |||||
| frame->linesize[1] = aligned_width; | |||||
| frame->linesize[2] = aligned_width; | |||||
| break; | |||||
| case AV_PIX_FMT_0BGR32: | |||||
| case AV_PIX_FMT_0RGB32: | |||||
| frame->data[0] = frame->buf[0]->data; | |||||
| frame->linesize[0] = aligned_width * 4; | |||||
| break; | |||||
| default: | |||||
| av_frame_unref(frame); | |||||
| return AVERROR_BUG; | |||||
| res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data, | |||||
| ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT); | |||||
| if (res < 0) | |||||
| return res; | |||||
| // YUV420P is a special case. | |||||
| // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them. | |||||
| if (ctx->sw_format == AV_PIX_FMT_YUV420P) { | |||||
| FFSWAP(uint8_t*, frame->data[1], frame->data[2]); | |||||
| FFSWAP(int, frame->linesize[1], frame->linesize[2]); | |||||
| } | } | ||||
| frame->format = AV_PIX_FMT_CUDA; | frame->format = AV_PIX_FMT_CUDA; | ||||