You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

315 lines
9.1KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "buffer.h"
  19. #include "common.h"
  20. #include "hwcontext.h"
  21. #include "hwcontext_internal.h"
  22. #include "hwcontext_cuda.h"
  23. #include "mem.h"
  24. #include "pixdesc.h"
  25. #include "pixfmt.h"
  26. typedef struct CUDAFramesContext {
  27. int shift_width, shift_height;
  28. } CUDAFramesContext;
  29. static const enum AVPixelFormat supported_formats[] = {
  30. AV_PIX_FMT_NV12,
  31. AV_PIX_FMT_YUV420P,
  32. AV_PIX_FMT_YUV444P,
  33. };
  34. static void cuda_buffer_free(void *opaque, uint8_t *data)
  35. {
  36. AVHWFramesContext *ctx = opaque;
  37. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  38. CUcontext dummy;
  39. cuCtxPushCurrent(hwctx->cuda_ctx);
  40. cuMemFree((CUdeviceptr)data);
  41. cuCtxPopCurrent(&dummy);
  42. }
  43. static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
  44. {
  45. AVHWFramesContext *ctx = opaque;
  46. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  47. AVBufferRef *ret = NULL;
  48. CUcontext dummy = NULL;
  49. CUdeviceptr data;
  50. CUresult err;
  51. err = cuCtxPushCurrent(hwctx->cuda_ctx);
  52. if (err != CUDA_SUCCESS) {
  53. av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
  54. return NULL;
  55. }
  56. err = cuMemAlloc(&data, size);
  57. if (err != CUDA_SUCCESS)
  58. goto fail;
  59. ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
  60. if (!ret) {
  61. cuMemFree(data);
  62. goto fail;
  63. }
  64. fail:
  65. cuCtxPopCurrent(&dummy);
  66. return ret;
  67. }
  68. static int cuda_frames_init(AVHWFramesContext *ctx)
  69. {
  70. CUDAFramesContext *priv = ctx->internal->priv;
  71. int i;
  72. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
  73. if (ctx->sw_format == supported_formats[i])
  74. break;
  75. }
  76. if (i == FF_ARRAY_ELEMS(supported_formats)) {
  77. av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
  78. av_get_pix_fmt_name(ctx->sw_format));
  79. return AVERROR(ENOSYS);
  80. }
  81. av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
  82. if (!ctx->pool) {
  83. int size;
  84. switch (ctx->sw_format) {
  85. case AV_PIX_FMT_NV12:
  86. case AV_PIX_FMT_YUV420P:
  87. size = ctx->width * ctx->height * 3 / 2;
  88. break;
  89. case AV_PIX_FMT_YUV444P:
  90. size = ctx->width * ctx->height * 3;
  91. break;
  92. }
  93. ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
  94. if (!ctx->internal->pool_internal)
  95. return AVERROR(ENOMEM);
  96. }
  97. return 0;
  98. }
  99. static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
  100. {
  101. frame->buf[0] = av_buffer_pool_get(ctx->pool);
  102. if (!frame->buf[0])
  103. return AVERROR(ENOMEM);
  104. switch (ctx->sw_format) {
  105. case AV_PIX_FMT_NV12:
  106. frame->data[0] = frame->buf[0]->data;
  107. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  108. frame->linesize[0] = ctx->width;
  109. frame->linesize[1] = ctx->width;
  110. break;
  111. case AV_PIX_FMT_YUV420P:
  112. frame->data[0] = frame->buf[0]->data;
  113. frame->data[2] = frame->data[0] + ctx->width * ctx->height;
  114. frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
  115. frame->linesize[0] = ctx->width;
  116. frame->linesize[1] = ctx->width / 2;
  117. frame->linesize[2] = ctx->width / 2;
  118. break;
  119. case AV_PIX_FMT_YUV444P:
  120. frame->data[0] = frame->buf[0]->data;
  121. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  122. frame->data[2] = frame->data[1] + ctx->width * ctx->height;
  123. frame->linesize[0] = ctx->width;
  124. frame->linesize[1] = ctx->width;
  125. frame->linesize[2] = ctx->width;
  126. break;
  127. default:
  128. av_frame_unref(frame);
  129. return AVERROR_BUG;
  130. }
  131. frame->format = AV_PIX_FMT_CUDA;
  132. frame->width = ctx->width;
  133. frame->height = ctx->height;
  134. return 0;
  135. }
  136. static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
  137. enum AVHWFrameTransferDirection dir,
  138. enum AVPixelFormat **formats)
  139. {
  140. enum AVPixelFormat *fmts;
  141. fmts = av_malloc_array(2, sizeof(*fmts));
  142. if (!fmts)
  143. return AVERROR(ENOMEM);
  144. fmts[0] = ctx->sw_format;
  145. fmts[1] = AV_PIX_FMT_NONE;
  146. *formats = fmts;
  147. return 0;
  148. }
  149. static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
  150. const AVFrame *src)
  151. {
  152. CUDAFramesContext *priv = ctx->internal->priv;
  153. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  154. CUcontext dummy;
  155. CUresult err;
  156. int i;
  157. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  158. if (err != CUDA_SUCCESS)
  159. return AVERROR_UNKNOWN;
  160. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  161. CUDA_MEMCPY2D cpy = {
  162. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  163. .dstMemoryType = CU_MEMORYTYPE_HOST,
  164. .srcDevice = (CUdeviceptr)src->data[i],
  165. .dstHost = dst->data[i],
  166. .srcPitch = src->linesize[i],
  167. .dstPitch = dst->linesize[i],
  168. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  169. .Height = src->height >> (i ? priv->shift_height : 0),
  170. };
  171. err = cuMemcpy2D(&cpy);
  172. if (err != CUDA_SUCCESS) {
  173. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  174. return AVERROR_UNKNOWN;
  175. }
  176. }
  177. cuCtxPopCurrent(&dummy);
  178. return 0;
  179. }
  180. static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
  181. const AVFrame *src)
  182. {
  183. CUDAFramesContext *priv = ctx->internal->priv;
  184. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  185. CUcontext dummy;
  186. CUresult err;
  187. int i;
  188. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  189. if (err != CUDA_SUCCESS)
  190. return AVERROR_UNKNOWN;
  191. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  192. CUDA_MEMCPY2D cpy = {
  193. .srcMemoryType = CU_MEMORYTYPE_HOST,
  194. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  195. .srcHost = src->data[i],
  196. .dstDevice = (CUdeviceptr)dst->data[i],
  197. .srcPitch = src->linesize[i],
  198. .dstPitch = dst->linesize[i],
  199. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  200. .Height = src->height >> (i ? priv->shift_height : 0),
  201. };
  202. err = cuMemcpy2D(&cpy);
  203. if (err != CUDA_SUCCESS) {
  204. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  205. return AVERROR_UNKNOWN;
  206. }
  207. }
  208. cuCtxPopCurrent(&dummy);
  209. return 0;
  210. }
  211. static void cuda_device_free(AVHWDeviceContext *ctx)
  212. {
  213. AVCUDADeviceContext *hwctx = ctx->hwctx;
  214. cuCtxDestroy(hwctx->cuda_ctx);
  215. }
  216. static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
  217. AVDictionary *opts, int flags)
  218. {
  219. AVCUDADeviceContext *hwctx = ctx->hwctx;
  220. CUdevice cu_device;
  221. CUcontext dummy;
  222. CUresult err;
  223. int device_idx = 0;
  224. if (device)
  225. device_idx = strtol(device, NULL, 0);
  226. err = cuInit(0);
  227. if (err != CUDA_SUCCESS) {
  228. av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
  229. return AVERROR_UNKNOWN;
  230. }
  231. err = cuDeviceGet(&cu_device, device_idx);
  232. if (err != CUDA_SUCCESS) {
  233. av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
  234. return AVERROR_UNKNOWN;
  235. }
  236. err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
  237. if (err != CUDA_SUCCESS) {
  238. av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
  239. return AVERROR_UNKNOWN;
  240. }
  241. cuCtxPopCurrent(&dummy);
  242. ctx->free = cuda_device_free;
  243. return 0;
  244. }
  245. const HWContextType ff_hwcontext_type_cuda = {
  246. .type = AV_HWDEVICE_TYPE_CUDA,
  247. .name = "CUDA",
  248. .device_hwctx_size = sizeof(AVCUDADeviceContext),
  249. .frames_priv_size = sizeof(CUDAFramesContext),
  250. .device_create = cuda_device_create,
  251. .frames_init = cuda_frames_init,
  252. .frames_get_buffer = cuda_get_buffer,
  253. .transfer_get_formats = cuda_transfer_get_formats,
  254. .transfer_data_to = cuda_transfer_data_to,
  255. .transfer_data_from = cuda_transfer_data_from,
  256. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
  257. };