|
|
|
@@ -34,8 +34,14 @@ |
|
|
|
#include "avcodec.h" |
|
|
|
#include "decode.h" |
|
|
|
#include "hwaccel.h" |
|
|
|
#include "nvdec.h" |
|
|
|
#include "internal.h" |
|
|
|
|
|
|
|
#if !NVDECAPI_CHECK_VERSION(9, 0) |
|
|
|
#define cudaVideoSurfaceFormat_YUV444 2 |
|
|
|
#define cudaVideoSurfaceFormat_YUV444_16Bit 3 |
|
|
|
#endif |
|
|
|
|
|
|
|
typedef struct CuvidContext |
|
|
|
{ |
|
|
|
AVClass *avclass; |
|
|
|
@@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form |
|
|
|
CUVIDDECODECAPS *caps = NULL; |
|
|
|
CUVIDDECODECREATEINFO cuinfo; |
|
|
|
int surface_fmt; |
|
|
|
int chroma_444; |
|
|
|
|
|
|
|
int old_width = avctx->width; |
|
|
|
int old_height = avctx->height; |
|
|
|
@@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form |
|
|
|
cuinfo.target_rect.right = cuinfo.ulTargetWidth; |
|
|
|
cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; |
|
|
|
|
|
|
|
chroma_444 = format->chroma_format == cudaVideoChromaFormat_444; |
|
|
|
|
|
|
|
switch (format->bit_depth_luma_minus8) { |
|
|
|
case 0: // 8-bit |
|
|
|
pix_fmts[1] = AV_PIX_FMT_NV12; |
|
|
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; |
|
|
|
caps = &ctx->caps8; |
|
|
|
break; |
|
|
|
case 2: // 10-bit |
|
|
|
pix_fmts[1] = AV_PIX_FMT_P010; |
|
|
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; |
|
|
|
caps = &ctx->caps10; |
|
|
|
break; |
|
|
|
case 4: // 12-bit |
|
|
|
pix_fmts[1] = AV_PIX_FMT_P016; |
|
|
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; |
|
|
|
caps = &ctx->caps12; |
|
|
|
break; |
|
|
|
default: |
|
|
|
@@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
if (format->chroma_format != cudaVideoChromaFormat_420) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); |
|
|
|
ctx->internal_error = AVERROR(EINVAL); |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
ctx->chroma_format = format->chroma_format; |
|
|
|
|
|
|
|
cuinfo.CodecType = ctx->codec_type = format->codec; |
|
|
|
@@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form |
|
|
|
case AV_PIX_FMT_P016: |
|
|
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; |
|
|
|
break; |
|
|
|
case AV_PIX_FMT_YUV444P: |
|
|
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444; |
|
|
|
break; |
|
|
|
case AV_PIX_FMT_YUV444P16: |
|
|
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit; |
|
|
|
break; |
|
|
|
default: |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); |
|
|
|
av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n", |
|
|
|
av_get_pix_fmt_name(avctx->sw_pix_fmt)); |
|
|
|
ctx->internal_error = AVERROR(EINVAL); |
|
|
|
return 0; |
|
|
|
} |
|
|
|
@@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) |
|
|
|
return ret; |
|
|
|
|
|
|
|
if (av_fifo_size(ctx->frame_queue)) { |
|
|
|
const AVPixFmtDescriptor *pixdesc; |
|
|
|
CuvidParsedFrame parsed_frame; |
|
|
|
CUVIDPROCPARAMS params; |
|
|
|
unsigned int pitch = 0; |
|
|
|
@@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) |
|
|
|
goto error; |
|
|
|
} |
|
|
|
|
|
|
|
for (i = 0; i < 2; i++) { |
|
|
|
pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); |
|
|
|
|
|
|
|
for (i = 0; i < pixdesc->nb_components; i++) { |
|
|
|
int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0); |
|
|
|
CUDA_MEMCPY2D cpy = { |
|
|
|
.srcMemoryType = CU_MEMORYTYPE_DEVICE, |
|
|
|
.dstMemoryType = CU_MEMORYTYPE_DEVICE, |
|
|
|
@@ -530,22 +544,25 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) |
|
|
|
.dstPitch = frame->linesize[i], |
|
|
|
.srcY = offset, |
|
|
|
.WidthInBytes = FFMIN(pitch, frame->linesize[i]), |
|
|
|
.Height = avctx->height >> (i ? 1 : 0), |
|
|
|
.Height = height, |
|
|
|
}; |
|
|
|
|
|
|
|
ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream)); |
|
|
|
if (ret < 0) |
|
|
|
goto error; |
|
|
|
|
|
|
|
offset += avctx->height; |
|
|
|
offset += height; |
|
|
|
} |
|
|
|
|
|
|
|
ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream)); |
|
|
|
if (ret < 0) |
|
|
|
goto error; |
|
|
|
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_P010 || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_P016) { |
|
|
|
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_P010 || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_P016 || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_YUV444P || |
|
|
|
avctx->pix_fmt == AV_PIX_FMT_YUV444P16) { |
|
|
|
unsigned int offset = 0; |
|
|
|
AVFrame *tmp_frame = av_frame_alloc(); |
|
|
|
if (!tmp_frame) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); |
|
|
|
@@ -553,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) |
|
|
|
goto error; |
|
|
|
} |
|
|
|
|
|
|
|
pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); |
|
|
|
|
|
|
|
tmp_frame->format = AV_PIX_FMT_CUDA; |
|
|
|
tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe); |
|
|
|
tmp_frame->data[0] = (uint8_t*)mapped_frame; |
|
|
|
tmp_frame->linesize[0] = pitch; |
|
|
|
tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch); |
|
|
|
tmp_frame->linesize[1] = pitch; |
|
|
|
tmp_frame->width = avctx->width; |
|
|
|
tmp_frame->height = avctx->height; |
|
|
|
|
|
|
|
/* |
|
|
|
* Note that the following logic would not work for three plane |
|
|
|
* YUV420 because the pitch value is different for the chroma |
|
|
|
* planes. |
|
|
|
*/ |
|
|
|
for (i = 0; i < pixdesc->nb_components; i++) { |
|
|
|
tmp_frame->data[i] = (uint8_t*)mapped_frame + offset; |
|
|
|
tmp_frame->linesize[i] = pitch; |
|
|
|
offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0)); |
|
|
|
} |
|
|
|
|
|
|
|
ret = ff_get_buffer(avctx, frame, 0); |
|
|
|
if (ret < 0) { |
|
|
|
av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n"); |
|
|
|
|