You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

337 lines
9.9KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "buffer.h"
  19. #include "common.h"
  20. #include "hwcontext.h"
  21. #include "hwcontext_internal.h"
  22. #include "hwcontext_cuda.h"
  23. #include "mem.h"
  24. #include "pixdesc.h"
  25. #include "pixfmt.h"
  26. typedef struct CUDAFramesContext {
  27. int shift_width, shift_height;
  28. } CUDAFramesContext;
  29. static const enum AVPixelFormat supported_formats[] = {
  30. AV_PIX_FMT_NV12,
  31. AV_PIX_FMT_YUV420P,
  32. AV_PIX_FMT_P010,
  33. AV_PIX_FMT_YUV444P,
  34. AV_PIX_FMT_YUV444P16,
  35. };
  36. static void cuda_buffer_free(void *opaque, uint8_t *data)
  37. {
  38. AVHWFramesContext *ctx = opaque;
  39. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  40. CUcontext dummy;
  41. cuCtxPushCurrent(hwctx->cuda_ctx);
  42. cuMemFree((CUdeviceptr)data);
  43. cuCtxPopCurrent(&dummy);
  44. }
  45. static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
  46. {
  47. AVHWFramesContext *ctx = opaque;
  48. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  49. AVBufferRef *ret = NULL;
  50. CUcontext dummy = NULL;
  51. CUdeviceptr data;
  52. CUresult err;
  53. err = cuCtxPushCurrent(hwctx->cuda_ctx);
  54. if (err != CUDA_SUCCESS) {
  55. av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
  56. return NULL;
  57. }
  58. err = cuMemAlloc(&data, size);
  59. if (err != CUDA_SUCCESS)
  60. goto fail;
  61. ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
  62. if (!ret) {
  63. cuMemFree(data);
  64. goto fail;
  65. }
  66. fail:
  67. cuCtxPopCurrent(&dummy);
  68. return ret;
  69. }
  70. static int cuda_frames_init(AVHWFramesContext *ctx)
  71. {
  72. CUDAFramesContext *priv = ctx->internal->priv;
  73. int i;
  74. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
  75. if (ctx->sw_format == supported_formats[i])
  76. break;
  77. }
  78. if (i == FF_ARRAY_ELEMS(supported_formats)) {
  79. av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
  80. av_get_pix_fmt_name(ctx->sw_format));
  81. return AVERROR(ENOSYS);
  82. }
  83. av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
  84. if (!ctx->pool) {
  85. int size;
  86. switch (ctx->sw_format) {
  87. case AV_PIX_FMT_NV12:
  88. case AV_PIX_FMT_YUV420P:
  89. size = ctx->width * ctx->height * 3 / 2;
  90. break;
  91. case AV_PIX_FMT_P010:
  92. size = ctx->width * ctx->height * 3;
  93. break;
  94. case AV_PIX_FMT_YUV444P:
  95. size = ctx->width * ctx->height * 3;
  96. break;
  97. case AV_PIX_FMT_YUV444P16:
  98. size = ctx->width * ctx->height * 6;
  99. break;
  100. }
  101. ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
  102. if (!ctx->internal->pool_internal)
  103. return AVERROR(ENOMEM);
  104. }
  105. return 0;
  106. }
  107. static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
  108. {
  109. frame->buf[0] = av_buffer_pool_get(ctx->pool);
  110. if (!frame->buf[0])
  111. return AVERROR(ENOMEM);
  112. switch (ctx->sw_format) {
  113. case AV_PIX_FMT_NV12:
  114. frame->data[0] = frame->buf[0]->data;
  115. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  116. frame->linesize[0] = ctx->width;
  117. frame->linesize[1] = ctx->width;
  118. break;
  119. case AV_PIX_FMT_YUV420P:
  120. frame->data[0] = frame->buf[0]->data;
  121. frame->data[2] = frame->data[0] + ctx->width * ctx->height;
  122. frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
  123. frame->linesize[0] = ctx->width;
  124. frame->linesize[1] = ctx->width / 2;
  125. frame->linesize[2] = ctx->width / 2;
  126. break;
  127. case AV_PIX_FMT_P010:
  128. frame->data[0] = frame->buf[0]->data;
  129. frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
  130. frame->linesize[0] = 2 * ctx->width;
  131. frame->linesize[1] = 2 * ctx->width;
  132. break;
  133. case AV_PIX_FMT_YUV444P:
  134. frame->data[0] = frame->buf[0]->data;
  135. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  136. frame->data[2] = frame->data[1] + ctx->width * ctx->height;
  137. frame->linesize[0] = ctx->width;
  138. frame->linesize[1] = ctx->width;
  139. frame->linesize[2] = ctx->width;
  140. break;
  141. case AV_PIX_FMT_YUV444P16:
  142. frame->data[0] = frame->buf[0]->data;
  143. frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
  144. frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height;
  145. frame->linesize[0] = 2 * ctx->width;
  146. frame->linesize[1] = 2 * ctx->width;
  147. frame->linesize[2] = 2 * ctx->width;
  148. break;
  149. default:
  150. av_frame_unref(frame);
  151. return AVERROR_BUG;
  152. }
  153. frame->format = AV_PIX_FMT_CUDA;
  154. frame->width = ctx->width;
  155. frame->height = ctx->height;
  156. return 0;
  157. }
  158. static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
  159. enum AVHWFrameTransferDirection dir,
  160. enum AVPixelFormat **formats)
  161. {
  162. enum AVPixelFormat *fmts;
  163. fmts = av_malloc_array(2, sizeof(*fmts));
  164. if (!fmts)
  165. return AVERROR(ENOMEM);
  166. fmts[0] = ctx->sw_format;
  167. fmts[1] = AV_PIX_FMT_NONE;
  168. *formats = fmts;
  169. return 0;
  170. }
  171. static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
  172. const AVFrame *src)
  173. {
  174. CUDAFramesContext *priv = ctx->internal->priv;
  175. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  176. CUcontext dummy;
  177. CUresult err;
  178. int i;
  179. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  180. if (err != CUDA_SUCCESS)
  181. return AVERROR_UNKNOWN;
  182. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  183. CUDA_MEMCPY2D cpy = {
  184. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  185. .dstMemoryType = CU_MEMORYTYPE_HOST,
  186. .srcDevice = (CUdeviceptr)src->data[i],
  187. .dstHost = dst->data[i],
  188. .srcPitch = src->linesize[i],
  189. .dstPitch = dst->linesize[i],
  190. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  191. .Height = src->height >> (i ? priv->shift_height : 0),
  192. };
  193. err = cuMemcpy2D(&cpy);
  194. if (err != CUDA_SUCCESS) {
  195. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  196. return AVERROR_UNKNOWN;
  197. }
  198. }
  199. cuCtxPopCurrent(&dummy);
  200. return 0;
  201. }
  202. static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
  203. const AVFrame *src)
  204. {
  205. CUDAFramesContext *priv = ctx->internal->priv;
  206. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  207. CUcontext dummy;
  208. CUresult err;
  209. int i;
  210. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  211. if (err != CUDA_SUCCESS)
  212. return AVERROR_UNKNOWN;
  213. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  214. CUDA_MEMCPY2D cpy = {
  215. .srcMemoryType = CU_MEMORYTYPE_HOST,
  216. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  217. .srcHost = src->data[i],
  218. .dstDevice = (CUdeviceptr)dst->data[i],
  219. .srcPitch = src->linesize[i],
  220. .dstPitch = dst->linesize[i],
  221. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  222. .Height = src->height >> (i ? priv->shift_height : 0),
  223. };
  224. err = cuMemcpy2D(&cpy);
  225. if (err != CUDA_SUCCESS) {
  226. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  227. return AVERROR_UNKNOWN;
  228. }
  229. }
  230. cuCtxPopCurrent(&dummy);
  231. return 0;
  232. }
  233. static void cuda_device_free(AVHWDeviceContext *ctx)
  234. {
  235. AVCUDADeviceContext *hwctx = ctx->hwctx;
  236. cuCtxDestroy(hwctx->cuda_ctx);
  237. }
  238. static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
  239. AVDictionary *opts, int flags)
  240. {
  241. AVCUDADeviceContext *hwctx = ctx->hwctx;
  242. CUdevice cu_device;
  243. CUcontext dummy;
  244. CUresult err;
  245. int device_idx = 0;
  246. if (device)
  247. device_idx = strtol(device, NULL, 0);
  248. err = cuInit(0);
  249. if (err != CUDA_SUCCESS) {
  250. av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
  251. return AVERROR_UNKNOWN;
  252. }
  253. err = cuDeviceGet(&cu_device, device_idx);
  254. if (err != CUDA_SUCCESS) {
  255. av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
  256. return AVERROR_UNKNOWN;
  257. }
  258. err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
  259. if (err != CUDA_SUCCESS) {
  260. av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
  261. return AVERROR_UNKNOWN;
  262. }
  263. cuCtxPopCurrent(&dummy);
  264. ctx->free = cuda_device_free;
  265. return 0;
  266. }
  267. const HWContextType ff_hwcontext_type_cuda = {
  268. .type = AV_HWDEVICE_TYPE_CUDA,
  269. .name = "CUDA",
  270. .device_hwctx_size = sizeof(AVCUDADeviceContext),
  271. .frames_priv_size = sizeof(CUDAFramesContext),
  272. .device_create = cuda_device_create,
  273. .frames_init = cuda_frames_init,
  274. .frames_get_buffer = cuda_get_buffer,
  275. .transfer_get_formats = cuda_transfer_get_formats,
  276. .transfer_data_to = cuda_transfer_data_to,
  277. .transfer_data_from = cuda_transfer_data_from,
  278. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
  279. };