We have a pattern of wrapping CUDA calls to print errors and normalise return values that is used in a couple of places. To avoid duplication and increase consistency, let's put the wrapper implementation in a shared place and use it everywhere. Affects: * avcodec/cuviddec * avcodec/nvdec * avcodec/nvenc * avfilter/vf_scale_cuda * avfilter/vf_scale_npp * avfilter/vf_thumbnail_cuda * avfilter/vf_transpose_npp * avfilter/vf_yadif_cudatags/n4.2
| @@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | |||
| motion_est.o ratecontrol.o \ | |||
| mpegvideoencdsp.o | |||
| OBJS-$(CONFIG_MSS34DSP) += mss34dsp.o | |||
| OBJS-$(CONFIG_NVENC) += nvenc.o | |||
| OBJS-$(CONFIG_NVENC) += nvenc.o cuda_check.o | |||
| OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o | |||
| OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | |||
| OBJS-$(CONFIG_QSV) += qsv.o | |||
| @@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \ | |||
| h264_refs.o h264_sei.o \ | |||
| h264_slice.o h264data.o | |||
| OBJS-$(CONFIG_H264_AMF_ENCODER) += amfenc_h264.o | |||
| OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o | |||
| OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o cuda_check.o | |||
| OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o | |||
| OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o | |||
| OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o | |||
| @@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER) += adpcmenc.o adpcm_data.o | |||
| # hardware accelerators | |||
| OBJS-$(CONFIG_D3D11VA) += dxva2.o | |||
| OBJS-$(CONFIG_DXVA2) += dxva2.o | |||
| OBJS-$(CONFIG_NVDEC) += nvdec.o | |||
| OBJS-$(CONFIG_NVDEC) += nvdec.o cuda_check.o | |||
| OBJS-$(CONFIG_VAAPI) += vaapi_decode.o | |||
| OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o | |||
| OBJS-$(CONFIG_VDPAU) += vdpau.o | |||
| @@ -0,0 +1 @@ | |||
| #include "libavutil/cuda_check.c" | |||
| @@ -25,6 +25,7 @@ | |||
| #include "libavutil/mathematics.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda_internal.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/fifo.h" | |||
| #include "libavutil/log.h" | |||
| #include "libavutil/opt.h" | |||
| @@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame | |||
| int is_deinterlacing; | |||
| } CuvidParsedFrame; | |||
| static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) | |||
| { | |||
| CuvidContext *ctx = avctx->priv_data; | |||
| const char *err_name; | |||
| const char *err_string; | |||
| av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); | |||
| if (err == CUDA_SUCCESS) | |||
| return 0; | |||
| ctx->cudl->cuGetErrorName(err, &err_name); | |||
| ctx->cudl->cuGetErrorString(err, &err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "%s failed", func); | |||
| if (err_name && err_string) | |||
| av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "\n"); | |||
| return AVERROR_EXTERNAL; | |||
| } | |||
| #define CHECK_CU(x) check_cu(avctx, (x), #x) | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) | |||
| static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) | |||
| { | |||
| @@ -26,6 +26,7 @@ | |||
| #include "libavutil/error.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda_internal.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "libavutil/pixfmt.h" | |||
| @@ -50,6 +51,8 @@ typedef struct NVDECFramePool { | |||
| unsigned int nb_allocated; | |||
| } NVDECFramePool; | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x) | |||
| static int map_avcodec_id(enum AVCodecID id) | |||
| { | |||
| switch (id) { | |||
| @@ -86,7 +89,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt) | |||
| static int nvdec_test_capabilities(NVDECDecoder *decoder, | |||
| CUVIDDECODECREATEINFO *params, void *logctx) | |||
| { | |||
| CUresult err; | |||
| int ret; | |||
| CUVIDDECODECAPS caps = { 0 }; | |||
| caps.eCodecType = params->CodecType; | |||
| @@ -105,11 +108,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder, | |||
| return 0; | |||
| } | |||
| err = decoder->cvdl->cuvidGetDecoderCaps(&caps); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps)); | |||
| if (ret < 0) | |||
| return ret; | |||
| av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n"); | |||
| av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n", | |||
| @@ -150,10 +151,11 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data) | |||
| NVDECDecoder *decoder = (NVDECDecoder*)data; | |||
| if (decoder->decoder) { | |||
| void *logctx = decoder->hw_device_ref->data; | |||
| CUcontext dummy; | |||
| decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); | |||
| decoder->cvdl->cuvidDestroyDecoder(decoder->decoder); | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); | |||
| CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder)); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| } | |||
| av_buffer_unref(&decoder->hw_device_ref); | |||
| @@ -173,7 +175,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, | |||
| NVDECDecoder *decoder; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| int ret; | |||
| decoder = av_mallocz(sizeof(*decoder)); | |||
| @@ -202,25 +203,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, | |||
| goto fail; | |||
| } | |||
| err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| ret = nvdec_test_capabilities(decoder, params, logctx); | |||
| if (ret < 0) { | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| goto fail; | |||
| } | |||
| err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params); | |||
| ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params)); | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err); | |||
| ret = AVERROR_UNKNOWN; | |||
| if (ret < 0) { | |||
| goto fail; | |||
| } | |||
| @@ -364,21 +361,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data) | |||
| { | |||
| NVDECFrame *unmap_data = (NVDECFrame*)data; | |||
| NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data; | |||
| void *logctx = decoder->hw_device_ref->data; | |||
| CUdeviceptr devptr = (CUdeviceptr)opaque; | |||
| CUresult err; | |||
| int ret; | |||
| CUcontext dummy; | |||
| err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); | |||
| ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); | |||
| if (ret < 0) | |||
| goto finish; | |||
| } | |||
| err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); | |||
| if (err != CUDA_SUCCESS) | |||
| av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n"); | |||
| CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr)); | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| finish: | |||
| av_buffer_unref(&unmap_data->idx_ref); | |||
| @@ -395,7 +389,6 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) | |||
| CUVIDPROCPARAMS vpp = { 0 }; | |||
| NVDECFrame *unmap_data = NULL; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| CUdeviceptr devptr; | |||
| @@ -406,18 +399,15 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) | |||
| vpp.progressive_frame = 1; | |||
| vpp.output_stream = decoder->stream; | |||
| err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) | |||
| return AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr, | |||
| &pitch, &vpp); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n", | |||
| err); | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, | |||
| cf->idx, &devptr, | |||
| &pitch, &vpp)); | |||
| if (ret < 0) | |||
| goto finish; | |||
| } | |||
| unmap_data = av_mallocz(sizeof(*unmap_data)); | |||
| if (!unmap_data) { | |||
| @@ -447,14 +437,14 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) | |||
| copy_fail: | |||
| if (!frame->buf[1]) { | |||
| decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); | |||
| CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr)); | |||
| av_freep(&unmap_data); | |||
| } else { | |||
| av_buffer_unref(&frame->buf[1]); | |||
| } | |||
| finish: | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| return ret; | |||
| } | |||
| @@ -504,9 +494,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) | |||
| { | |||
| NVDECContext *ctx = avctx->internal->hwaccel_priv_data; | |||
| NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data; | |||
| void *logctx = avctx; | |||
| CUVIDPICPARAMS *pp = &ctx->pic_params; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| int ret = 0; | |||
| @@ -516,20 +506,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) | |||
| pp->nNumSlices = ctx->nb_slices; | |||
| pp->pSliceDataOffsets = ctx->slice_offsets; | |||
| err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) | |||
| return AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n", | |||
| err); | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params)); | |||
| if (ret < 0) | |||
| goto finish; | |||
| } | |||
| finish: | |||
| decoder->cudl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); | |||
| return ret; | |||
| } | |||
| @@ -25,12 +25,15 @@ | |||
| #include "libavutil/hwcontext_cuda.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/imgutils.h" | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "internal.h" | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x) | |||
| #define NVENC_CAP 0x30 | |||
| #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ | |||
| rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \ | |||
| @@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx) | |||
| { | |||
| NvencContext *ctx = avctx->priv_data; | |||
| NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; | |||
| CUresult cu_res; | |||
| if (ctx->d3d11_device) | |||
| return 0; | |||
| cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); | |||
| return AVERROR_EXTERNAL; | |||
| } | |||
| return 0; | |||
| return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context)); | |||
| } | |||
| static int nvenc_pop_context(AVCodecContext *avctx) | |||
| { | |||
| NvencContext *ctx = avctx->priv_data; | |||
| NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; | |||
| CUresult cu_res; | |||
| CUcontext dummy; | |||
| if (ctx->d3d11_device) | |||
| return 0; | |||
| cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n"); | |||
| return AVERROR_EXTERNAL; | |||
| } | |||
| return 0; | |||
| return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy)); | |||
| } | |||
| static av_cold int nvenc_open_session(AVCodecContext *avctx) | |||
| @@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) | |||
| NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; | |||
| char name[128] = { 0}; | |||
| int major, minor, ret; | |||
| CUresult cu_res; | |||
| CUdevice cu_device; | |||
| int loglevel = AV_LOG_VERBOSE; | |||
| if (ctx->device == LIST_DEVICES) | |||
| loglevel = AV_LOG_INFO; | |||
| cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, | |||
| "Cannot access the CUDA device %d\n", | |||
| idx); | |||
| return -1; | |||
| } | |||
| ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx); | |||
| return -1; | |||
| } | |||
| ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device)); | |||
| if (ret < 0) | |||
| return ret; | |||
| cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx); | |||
| return -1; | |||
| } | |||
| ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device)); | |||
| if (ret < 0) | |||
| return ret; | |||
| av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor); | |||
| if (((major << 4) | minor) < NVENC_CAP) { | |||
| @@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) | |||
| if (ctx->device != idx && ctx->device != ANY_DEVICE) | |||
| return -1; | |||
| cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device); | |||
| if (cu_res != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res); | |||
| ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| ctx->cu_context = ctx->cu_context_internal; | |||
| @@ -477,7 +455,7 @@ fail3: | |||
| return ret; | |||
| fail2: | |||
| dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); | |||
| CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); | |||
| ctx->cu_context_internal = NULL; | |||
| fail: | |||
| @@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) | |||
| } else { | |||
| int i, nb_devices = 0; | |||
| if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, | |||
| "Cannot init CUDA\n"); | |||
| if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0) | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) { | |||
| av_log(avctx, AV_LOG_ERROR, | |||
| "Cannot enumerate the CUDA devices\n"); | |||
| if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0) | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| if (!nb_devices) { | |||
| av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); | |||
| @@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) | |||
| ctx->nvencoder = NULL; | |||
| if (ctx->cu_context_internal) | |||
| dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); | |||
| CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); | |||
| ctx->cu_context = ctx->cu_context_internal = NULL; | |||
| #if CONFIG_D3D11VA | |||
| @@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o | |||
| OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o | |||
| OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o | |||
| OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o | |||
| OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o | |||
| OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o | |||
| OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o \ | |||
| cuda_check.o | |||
| OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o | |||
| OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o | |||
| OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o | |||
| OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o | |||
| @@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER) += vf_blend.o framesync.o | |||
| OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o | |||
| OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o | |||
| OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o | |||
| OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o | |||
| OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \ | |||
| cuda_check.o | |||
| OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o | |||
| OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o | |||
| OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o | |||
| @@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o | |||
| opencl/tonemap.o opencl/colorspace_common.o | |||
| OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o | |||
| OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o | |||
| OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o | |||
| OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o | |||
| OBJS-$(CONFIG_TRIM_FILTER) += trim.o | |||
| OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o | |||
| OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o | |||
| @@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o | |||
| OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o | |||
| OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o | |||
| OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o | |||
| OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o | |||
| OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ | |||
| yadif_common.o cuda_check.o | |||
| OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o | |||
| OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o | |||
| OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o | |||
| @@ -0,0 +1 @@ | |||
| #include "libavutil/cuda_check.c" | |||
| @@ -28,6 +28,7 @@ | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| @@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = { | |||
| #define BLOCKX 32 | |||
| #define BLOCKY 16 | |||
| #define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) | |||
| typedef struct CUDAScaleContext { | |||
| const AVClass *class; | |||
| enum AVPixelFormat in_fmt; | |||
| @@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) | |||
| AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; | |||
| AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; | |||
| CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; | |||
| CUresult err; | |||
| int w, h; | |||
| int ret; | |||
| extern char vf_scale_cuda_ptx[]; | |||
| err = cuCtxPushCurrent(cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"); | |||
| cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"); | |||
| cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"); | |||
| cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"); | |||
| cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"); | |||
| cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"); | |||
| cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"); | |||
| cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR); | |||
| cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| if ((ret = ff_scale_eval_dimensions(s, | |||
| s->w_expr, s->h_expr, | |||
| @@ -339,7 +335,7 @@ fail: | |||
| return ret; | |||
| } | |||
| static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels, | |||
| static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels, | |||
| uint8_t *src_dptr, int src_width, int src_height, int src_pitch, | |||
| uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, | |||
| int pixel_size) | |||
| @@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex | |||
| desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; | |||
| } | |||
| cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size); | |||
| cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL); | |||
| CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size)); | |||
| CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, | |||
| BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL)); | |||
| return 0; | |||
| } | |||
| @@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; | |||
| AVFrame *out = NULL; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| int ret = 0; | |||
| @@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| goto fail; | |||
| } | |||
| err = cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| ret = cudascale_scale(ctx, out, in); | |||
| cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| @@ -29,6 +29,7 @@ | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda_internal.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| @@ -39,6 +40,8 @@ | |||
| #include "scale.h" | |||
| #include "video.h" | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) | |||
| static const enum AVPixelFormat supported_formats[] = { | |||
| AV_PIX_FMT_YUV420P, | |||
| AV_PIX_FMT_NV12, | |||
| @@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; | |||
| AVFrame *out = NULL; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| int ret = 0; | |||
| @@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| goto fail; | |||
| } | |||
| err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| ret = nppscale_scale(ctx, out, in); | |||
| device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| @@ -24,12 +24,15 @@ | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "avfilter.h" | |||
| #include "internal.h" | |||
| #define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) | |||
| #define HIST_SIZE (3*256) | |||
| #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) | |||
| #define BLOCKX 32 | |||
| @@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) | |||
| return picref; | |||
| } | |||
| static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels, | |||
| static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels, | |||
| int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) | |||
| { | |||
| CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; | |||
| @@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t | |||
| desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; | |||
| } | |||
| cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch); | |||
| cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL); | |||
| CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch)); | |||
| CHECK_CU(cuLaunchKernel(func, | |||
| DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, | |||
| BLOCKX, BLOCKY, 1, 0, 0, args, NULL)); | |||
| return 0; | |||
| } | |||
| @@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |||
| int *hist = s->frames[s->n].histogram; | |||
| AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; | |||
| AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| CUDA_MEMCPY2D cpy = { 0 }; | |||
| int ret = 0; | |||
| @@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |||
| // keep a reference of each frame | |||
| s->frames[s->n].buf = frame; | |||
| err = cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) | |||
| return AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)); | |||
| CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int))); | |||
| thumbnail(ctx, (int*)s->data, frame); | |||
| @@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |||
| cpy.WidthInBytes = HIST_SIZE * sizeof(int); | |||
| cpy.Height = 1; | |||
| err = cuMemcpy2D(&cpy); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cuMemcpy2D(&cpy)); | |||
| if (ret < 0) | |||
| return ret; | |||
| if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P || | |||
| hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE) | |||
| @@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |||
| hist[i] = 4 * hist[i]; | |||
| } | |||
| cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| if (ret < 0) | |||
| return ret; | |||
| @@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx) | |||
| ThumbnailCudaContext *s = ctx->priv; | |||
| if (s->data) { | |||
| cuMemFree(s->data); | |||
| CHECK_CU(cuMemFree(s->data)); | |||
| s->data = 0; | |||
| } | |||
| if (s->cu_module) { | |||
| cuModuleUnload(s->cu_module); | |||
| CHECK_CU(cuModuleUnload(s->cu_module)); | |||
| s->cu_module = NULL; | |||
| } | |||
| @@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink) | |||
| AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; | |||
| AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; | |||
| CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; | |||
| CUresult err; | |||
| int ret; | |||
| extern char vf_thumbnail_cuda_ptx[]; | |||
| err = cuCtxPushCurrent(cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"); | |||
| cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"); | |||
| cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"); | |||
| cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"); | |||
| cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); | |||
| cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); | |||
| cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); | |||
| cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); | |||
| cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); | |||
| err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort")); | |||
| CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); | |||
| CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); | |||
| CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); | |||
| ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int))); | |||
| if (ret < 0) | |||
| return ret; | |||
| cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx; | |||
| @@ -23,6 +23,7 @@ | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/hwcontext.h" | |||
| #include "libavutil/hwcontext_cuda_internal.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| @@ -32,6 +33,8 @@ | |||
| #include "internal.h" | |||
| #include "video.h" | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) | |||
| static const enum AVPixelFormat supported_formats[] = { | |||
| AV_PIX_FMT_YUV420P, | |||
| AV_PIX_FMT_YUV444P | |||
| @@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; | |||
| AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; | |||
| AVFrame *out = NULL; | |||
| CUresult err; | |||
| CUcontext dummy; | |||
| int ret = 0; | |||
| @@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) | |||
| goto fail; | |||
| } | |||
| err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| } | |||
| ret = npptranspose_filter(ctx, out, in); | |||
| device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); | |||
| if (ret < 0) | |||
| goto fail; | |||
| @@ -21,6 +21,7 @@ | |||
| #include <cuda.h> | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/hwcontext_cuda.h" | |||
| #include "libavutil/cuda_check.h" | |||
| #include "internal.h" | |||
| #include "yadif.h" | |||
| @@ -48,28 +49,7 @@ typedef struct DeintCUDAContext { | |||
| #define BLOCKX 32 | |||
| #define BLOCKY 16 | |||
| static int check_cu(AVFilterContext *avctx, CUresult err, const char *func) | |||
| { | |||
| const char *err_name; | |||
| const char *err_string; | |||
| av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); | |||
| if (err == CUDA_SUCCESS) | |||
| return 0; | |||
| cuGetErrorName(err, &err_name); | |||
| cuGetErrorString(err, &err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "%s failed", func); | |||
| if (err_name && err_string) | |||
| av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "\n"); | |||
| return AVERROR_EXTERNAL; | |||
| } | |||
| #define CHECK_CU(x) check_cu(ctx, (x), #x) | |||
| #define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) | |||
| static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, | |||
| CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, | |||
| @@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, | |||
| { | |||
| DeintCUDAContext *s = ctx->priv; | |||
| CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; | |||
| CUresult err; | |||
| int ret; | |||
| int skip_spatial_check = s->yadif.mode&2; | |||
| void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next, | |||
| @@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, | |||
| }; | |||
| res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; | |||
| err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; | |||
| err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; | |||
| err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = CHECK_CU(cuLaunchKernel(func, | |||
| ret = CHECK_CU(cuLaunchKernel(func, | |||
| DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, | |||
| BLOCKX, BLOCKY, 1, | |||
| 0, s->stream, args, NULL)); | |||
| @@ -138,7 +115,7 @@ exit: | |||
| if (tex_next) | |||
| CHECK_CU(cuTexObjectDestroy(tex_next)); | |||
| return err; | |||
| return ret; | |||
| } | |||
| static void filter(AVFilterContext *ctx, AVFrame *dst, | |||
| @@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, | |||
| DeintCUDAContext *s = ctx->priv; | |||
| YADIFContext *y = &s->yadif; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| int i; | |||
| int i, ret; | |||
| err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); | |||
| if (err != CUDA_SUCCESS) { | |||
| goto exit; | |||
| } | |||
| ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); | |||
| if (ret < 0) | |||
| return; | |||
| for (i = 0; i < y->csp->nb_components; i++) { | |||
| CUfunction func; | |||
| @@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, | |||
| parity, tff); | |||
| } | |||
| err = CHECK_CU(cuStreamSynchronize(s->stream)); | |||
| if (err != CUDA_SUCCESS) { | |||
| goto exit; | |||
| } | |||
| CHECK_CU(cuStreamSynchronize(s->stream)); | |||
| exit: | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| @@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link) | |||
| YADIFContext *y = &s->yadif; | |||
| int ret = 0; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| av_assert0(s->input_frames); | |||
| s->device_ref = av_buffer_ref(s->input_frames->device_ref); | |||
| @@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link) | |||
| y->csp = av_pix_fmt_desc_get(output_frames->sw_format); | |||
| y->filter = filter; | |||
| err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_EXTERNAL; | |||
| ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_INVALIDDATA; | |||
| ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_INVALIDDATA; | |||
| ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_INVALIDDATA; | |||
| ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_INVALIDDATA; | |||
| ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); | |||
| if (err != CUDA_SUCCESS) { | |||
| ret = AVERROR_INVALIDDATA; | |||
| ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| exit: | |||
| CHECK_CU(cuCtxPopCurrent(&dummy)); | |||
| @@ -157,7 +157,7 @@ OBJS = adler32.o \ | |||
| xtea.o \ | |||
| tea.o \ | |||
| OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o | |||
| OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o cuda_check.o | |||
| OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o | |||
| OBJS-$(CONFIG_DXVA2) += hwcontext_dxva2.o | |||
| OBJS-$(CONFIG_LIBDRM) += hwcontext_drm.o | |||
| @@ -175,7 +175,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%) | |||
| SLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o | |||
| SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h | |||
| SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h | |||
| SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h \ | |||
| cuda_check.h | |||
| SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h | |||
| SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h | |||
| SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h | |||
| @@ -0,0 +1,45 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "compat/cuda/dynlink_loader.h" | |||
| #include "libavutil/cuda_check.h" | |||
| int ff_cuda_check(void *avctx, | |||
| void *cuGetErrorName_fn, | |||
| void *cuGetErrorString_fn, | |||
| CUresult err, const char *func) | |||
| { | |||
| const char *err_name; | |||
| const char *err_string; | |||
| av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); | |||
| if (err == CUDA_SUCCESS) | |||
| return 0; | |||
| ((tcuGetErrorName *)cuGetErrorName_fn)(err, &err_name); | |||
| ((tcuGetErrorString *)cuGetErrorString_fn)(err, &err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "%s failed", func); | |||
| if (err_name && err_string) | |||
| av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); | |||
| av_log(avctx, AV_LOG_ERROR, "\n"); | |||
| return AVERROR_EXTERNAL; | |||
| } | |||
| @@ -0,0 +1,43 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef FF_CUDA_CHECK_H | |||
| #define FF_CUDA_CHECK_H | |||
| /** | |||
| * Wrap a CUDA function call and print error information if it fails. | |||
| */ | |||
| int ff_cuda_check(void *avctx, | |||
| void *cuGetErrorName_fn, void *cuGetErrorString_fn, | |||
| CUresult err, const char *func); | |||
| /** | |||
| * Convenience wrapper for ff_cuda_check when directly linking libcuda. | |||
| */ | |||
| #define FF_CUDA_CHECK(avclass, x) ff_cuda_check(avclass, cuGetErrorName, cuGetErrorString, (x), #x) | |||
| /** | |||
| * Convenience wrapper for ff_cuda_check when dynamically loading cuda symbols. | |||
| */ | |||
| #define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x) | |||
| #endif /* FF_CUDA_CHECK_H */ | |||
| @@ -21,6 +21,7 @@ | |||
| #include "hwcontext.h" | |||
| #include "hwcontext_internal.h" | |||
| #include "hwcontext_cuda_internal.h" | |||
| #include "cuda_check.h" | |||
| #include "mem.h" | |||
| #include "pixdesc.h" | |||
| #include "pixfmt.h" | |||
| @@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = { | |||
| AV_PIX_FMT_0BGR32, | |||
| }; | |||
| #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x) | |||
| static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, | |||
| const void *hwconfig, | |||
| AVHWFramesConstraints *constraints) | |||
| @@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, | |||
| static void cuda_buffer_free(void *opaque, uint8_t *data) | |||
| { | |||
| AVHWFramesContext *ctx = opaque; | |||
| AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| AVHWFramesContext *ctx = opaque; | |||
| AVHWDeviceContext *device_ctx = ctx->device_ctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| CUcontext dummy; | |||
| cu->cuCtxPushCurrent(hwctx->cuda_ctx); | |||
| CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | |||
| cu->cuMemFree((CUdeviceptr)data); | |||
| CHECK_CU(cu->cuMemFree((CUdeviceptr)data)); | |||
| cu->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | |||
| } | |||
| static AVBufferRef *cuda_pool_alloc(void *opaque, int size) | |||
| { | |||
| AVHWFramesContext *ctx = opaque; | |||
| AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| AVHWFramesContext *ctx = opaque; | |||
| AVHWDeviceContext *device_ctx = ctx->device_ctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| AVBufferRef *ret = NULL; | |||
| CUcontext dummy = NULL; | |||
| CUdeviceptr data; | |||
| CUresult err; | |||
| int err; | |||
| err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); | |||
| err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | |||
| if (err < 0) | |||
| return NULL; | |||
| } | |||
| err = cu->cuMemAlloc(&data, size); | |||
| if (err != CUDA_SUCCESS) | |||
| err = CHECK_CU(cu->cuMemAlloc(&data, size)); | |||
| if (err < 0) | |||
| goto fail; | |||
| ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); | |||
| if (!ret) { | |||
| cu->cuMemFree(data); | |||
| CHECK_CU(cu->cuMemFree(data)); | |||
| goto fail; | |||
| } | |||
| fail: | |||
| cu->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | |||
| return ret; | |||
| } | |||
| @@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx, | |||
| static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, | |||
| const AVFrame *src) | |||
| { | |||
| CUDAFramesContext *priv = ctx->internal->priv; | |||
| AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; | |||
| CudaFunctions *cu = device_hwctx->internal->cuda_dl; | |||
| CUDAFramesContext *priv = ctx->internal->priv; | |||
| AVHWDeviceContext *device_ctx = ctx->device_ctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| int i; | |||
| int i, ret; | |||
| err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) | |||
| return AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | |||
| CUDA_MEMCPY2D cpy = { | |||
| @@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, | |||
| .Height = src->height >> (i ? priv->shift_height : 0), | |||
| }; | |||
| err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = cu->cuStreamSynchronize(device_hwctx->stream); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| cu->cuCtxPopCurrent(&dummy); | |||
| exit: | |||
| CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | |||
| return 0; | |||
| } | |||
| @@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, | |||
| static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, | |||
| const AVFrame *src) | |||
| { | |||
| CUDAFramesContext *priv = ctx->internal->priv; | |||
| AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; | |||
| CudaFunctions *cu = device_hwctx->internal->cuda_dl; | |||
| CUDAFramesContext *priv = ctx->internal->priv; | |||
| AVHWDeviceContext *device_ctx = ctx->device_ctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| int i; | |||
| int i, ret; | |||
| err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); | |||
| if (err != CUDA_SUCCESS) | |||
| return AVERROR_UNKNOWN; | |||
| ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | |||
| if (ret < 0) | |||
| return ret; | |||
| for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | |||
| CUDA_MEMCPY2D cpy = { | |||
| @@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, | |||
| .Height = src->height >> (i ? priv->shift_height : 0), | |||
| }; | |||
| err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| } | |||
| err = cu->cuStreamSynchronize(device_hwctx->stream); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||
| ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); | |||
| if (ret < 0) | |||
| goto exit; | |||
| cu->cuCtxPopCurrent(&dummy); | |||
| exit: | |||
| CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | |||
| return 0; | |||
| } | |||
| static void cuda_device_uninit(AVHWDeviceContext *ctx) | |||
| static void cuda_device_uninit(AVHWDeviceContext *device_ctx) | |||
| { | |||
| AVCUDADeviceContext *hwctx = ctx->hwctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| if (hwctx->internal) { | |||
| CudaFunctions *cu = hwctx->internal->cuda_dl; | |||
| if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { | |||
| hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); | |||
| CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); | |||
| hwctx->cuda_ctx = NULL; | |||
| } | |||
| cuda_free_functions(&hwctx->internal->cuda_dl); | |||
| @@ -322,53 +320,47 @@ error: | |||
| return ret; | |||
| } | |||
| static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, | |||
| static int cuda_device_create(AVHWDeviceContext *device_ctx, | |||
| const char *device, | |||
| AVDictionary *opts, int flags) | |||
| { | |||
| AVCUDADeviceContext *hwctx = ctx->hwctx; | |||
| AVCUDADeviceContext *hwctx = device_ctx->hwctx; | |||
| CudaFunctions *cu; | |||
| CUdevice cu_device; | |||
| CUcontext dummy; | |||
| CUresult err; | |||
| int device_idx = 0; | |||
| int ret, device_idx = 0; | |||
| if (device) | |||
| device_idx = strtol(device, NULL, 0); | |||
| if (cuda_device_init(ctx) < 0) | |||
| if (cuda_device_init(device_ctx) < 0) | |||
| goto error; | |||
| cu = hwctx->internal->cuda_dl; | |||
| err = cu->cuInit(0); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); | |||
| ret = CHECK_CU(cu->cuInit(0)); | |||
| if (ret < 0) | |||
| goto error; | |||
| } | |||
| err = cu->cuDeviceGet(&cu_device, device_idx); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx); | |||
| ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); | |||
| if (ret < 0) | |||
| goto error; | |||
| } | |||
| err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); | |||
| if (err != CUDA_SUCCESS) { | |||
| av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); | |||
| ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); | |||
| if (ret < 0) | |||
| goto error; | |||
| } | |||
| // Setting stream to NULL will make functions automatically use the default CUstream | |||
| hwctx->stream = NULL; | |||
| cu->cuCtxPopCurrent(&dummy); | |||
| CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | |||
| hwctx->internal->is_allocated = 1; | |||
| return 0; | |||
| error: | |||
| cuda_device_uninit(ctx); | |||
| cuda_device_uninit(device_ctx); | |||
| return AVERROR_UNKNOWN; | |||
| } | |||