You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

363 lines
11KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "buffer.h"
  19. #include "common.h"
  20. #include "hwcontext.h"
  21. #include "hwcontext_internal.h"
  22. #include "hwcontext_cuda.h"
  23. #include "mem.h"
  24. #include "pixdesc.h"
  25. #include "pixfmt.h"
  26. typedef struct CUDAFramesContext {
  27. int shift_width, shift_height;
  28. } CUDAFramesContext;
  29. static const enum AVPixelFormat supported_formats[] = {
  30. AV_PIX_FMT_NV12,
  31. AV_PIX_FMT_YUV420P,
  32. AV_PIX_FMT_P010,
  33. AV_PIX_FMT_YUV444P,
  34. AV_PIX_FMT_YUV444P16,
  35. };
  36. static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
  37. const void *hwconfig,
  38. AVHWFramesConstraints *constraints)
  39. {
  40. int i;
  41. constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
  42. sizeof(*constraints->valid_sw_formats));
  43. if (!constraints->valid_sw_formats)
  44. return AVERROR(ENOMEM);
  45. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
  46. constraints->valid_sw_formats[i] = supported_formats[i];
  47. constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
  48. constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
  49. if (!constraints->valid_hw_formats)
  50. return AVERROR(ENOMEM);
  51. constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
  52. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  53. return 0;
  54. }
  55. static void cuda_buffer_free(void *opaque, uint8_t *data)
  56. {
  57. AVHWFramesContext *ctx = opaque;
  58. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  59. CUcontext dummy;
  60. cuCtxPushCurrent(hwctx->cuda_ctx);
  61. cuMemFree((CUdeviceptr)data);
  62. cuCtxPopCurrent(&dummy);
  63. }
  64. static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
  65. {
  66. AVHWFramesContext *ctx = opaque;
  67. AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
  68. AVBufferRef *ret = NULL;
  69. CUcontext dummy = NULL;
  70. CUdeviceptr data;
  71. CUresult err;
  72. err = cuCtxPushCurrent(hwctx->cuda_ctx);
  73. if (err != CUDA_SUCCESS) {
  74. av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
  75. return NULL;
  76. }
  77. err = cuMemAlloc(&data, size);
  78. if (err != CUDA_SUCCESS)
  79. goto fail;
  80. ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
  81. if (!ret) {
  82. cuMemFree(data);
  83. goto fail;
  84. }
  85. fail:
  86. cuCtxPopCurrent(&dummy);
  87. return ret;
  88. }
  89. static int cuda_frames_init(AVHWFramesContext *ctx)
  90. {
  91. CUDAFramesContext *priv = ctx->internal->priv;
  92. int i;
  93. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
  94. if (ctx->sw_format == supported_formats[i])
  95. break;
  96. }
  97. if (i == FF_ARRAY_ELEMS(supported_formats)) {
  98. av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
  99. av_get_pix_fmt_name(ctx->sw_format));
  100. return AVERROR(ENOSYS);
  101. }
  102. av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
  103. if (!ctx->pool) {
  104. int size;
  105. switch (ctx->sw_format) {
  106. case AV_PIX_FMT_NV12:
  107. case AV_PIX_FMT_YUV420P:
  108. size = ctx->width * ctx->height * 3 / 2;
  109. break;
  110. case AV_PIX_FMT_P010:
  111. size = ctx->width * ctx->height * 3;
  112. break;
  113. case AV_PIX_FMT_YUV444P:
  114. size = ctx->width * ctx->height * 3;
  115. break;
  116. case AV_PIX_FMT_YUV444P16:
  117. size = ctx->width * ctx->height * 6;
  118. break;
  119. }
  120. ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
  121. if (!ctx->internal->pool_internal)
  122. return AVERROR(ENOMEM);
  123. }
  124. return 0;
  125. }
  126. static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
  127. {
  128. frame->buf[0] = av_buffer_pool_get(ctx->pool);
  129. if (!frame->buf[0])
  130. return AVERROR(ENOMEM);
  131. switch (ctx->sw_format) {
  132. case AV_PIX_FMT_NV12:
  133. frame->data[0] = frame->buf[0]->data;
  134. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  135. frame->linesize[0] = ctx->width;
  136. frame->linesize[1] = ctx->width;
  137. break;
  138. case AV_PIX_FMT_YUV420P:
  139. frame->data[0] = frame->buf[0]->data;
  140. frame->data[2] = frame->data[0] + ctx->width * ctx->height;
  141. frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
  142. frame->linesize[0] = ctx->width;
  143. frame->linesize[1] = ctx->width / 2;
  144. frame->linesize[2] = ctx->width / 2;
  145. break;
  146. case AV_PIX_FMT_P010:
  147. frame->data[0] = frame->buf[0]->data;
  148. frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
  149. frame->linesize[0] = 2 * ctx->width;
  150. frame->linesize[1] = 2 * ctx->width;
  151. break;
  152. case AV_PIX_FMT_YUV444P:
  153. frame->data[0] = frame->buf[0]->data;
  154. frame->data[1] = frame->data[0] + ctx->width * ctx->height;
  155. frame->data[2] = frame->data[1] + ctx->width * ctx->height;
  156. frame->linesize[0] = ctx->width;
  157. frame->linesize[1] = ctx->width;
  158. frame->linesize[2] = ctx->width;
  159. break;
  160. case AV_PIX_FMT_YUV444P16:
  161. frame->data[0] = frame->buf[0]->data;
  162. frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
  163. frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height;
  164. frame->linesize[0] = 2 * ctx->width;
  165. frame->linesize[1] = 2 * ctx->width;
  166. frame->linesize[2] = 2 * ctx->width;
  167. break;
  168. default:
  169. av_frame_unref(frame);
  170. return AVERROR_BUG;
  171. }
  172. frame->format = AV_PIX_FMT_CUDA;
  173. frame->width = ctx->width;
  174. frame->height = ctx->height;
  175. return 0;
  176. }
  177. static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
  178. enum AVHWFrameTransferDirection dir,
  179. enum AVPixelFormat **formats)
  180. {
  181. enum AVPixelFormat *fmts;
  182. fmts = av_malloc_array(2, sizeof(*fmts));
  183. if (!fmts)
  184. return AVERROR(ENOMEM);
  185. fmts[0] = ctx->sw_format;
  186. fmts[1] = AV_PIX_FMT_NONE;
  187. *formats = fmts;
  188. return 0;
  189. }
  190. static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
  191. const AVFrame *src)
  192. {
  193. CUDAFramesContext *priv = ctx->internal->priv;
  194. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  195. CUcontext dummy;
  196. CUresult err;
  197. int i;
  198. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  199. if (err != CUDA_SUCCESS)
  200. return AVERROR_UNKNOWN;
  201. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  202. CUDA_MEMCPY2D cpy = {
  203. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  204. .dstMemoryType = CU_MEMORYTYPE_HOST,
  205. .srcDevice = (CUdeviceptr)src->data[i],
  206. .dstHost = dst->data[i],
  207. .srcPitch = src->linesize[i],
  208. .dstPitch = dst->linesize[i],
  209. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  210. .Height = src->height >> (i ? priv->shift_height : 0),
  211. };
  212. err = cuMemcpy2D(&cpy);
  213. if (err != CUDA_SUCCESS) {
  214. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  215. return AVERROR_UNKNOWN;
  216. }
  217. }
  218. cuCtxPopCurrent(&dummy);
  219. return 0;
  220. }
  221. static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
  222. const AVFrame *src)
  223. {
  224. CUDAFramesContext *priv = ctx->internal->priv;
  225. AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
  226. CUcontext dummy;
  227. CUresult err;
  228. int i;
  229. err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
  230. if (err != CUDA_SUCCESS)
  231. return AVERROR_UNKNOWN;
  232. for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
  233. CUDA_MEMCPY2D cpy = {
  234. .srcMemoryType = CU_MEMORYTYPE_HOST,
  235. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  236. .srcHost = src->data[i],
  237. .dstDevice = (CUdeviceptr)dst->data[i],
  238. .srcPitch = src->linesize[i],
  239. .dstPitch = dst->linesize[i],
  240. .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
  241. .Height = src->height >> (i ? priv->shift_height : 0),
  242. };
  243. err = cuMemcpy2D(&cpy);
  244. if (err != CUDA_SUCCESS) {
  245. av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
  246. return AVERROR_UNKNOWN;
  247. }
  248. }
  249. cuCtxPopCurrent(&dummy);
  250. return 0;
  251. }
  252. static void cuda_device_free(AVHWDeviceContext *ctx)
  253. {
  254. AVCUDADeviceContext *hwctx = ctx->hwctx;
  255. cuCtxDestroy(hwctx->cuda_ctx);
  256. }
  257. static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
  258. AVDictionary *opts, int flags)
  259. {
  260. AVCUDADeviceContext *hwctx = ctx->hwctx;
  261. CUdevice cu_device;
  262. CUcontext dummy;
  263. CUresult err;
  264. int device_idx = 0;
  265. if (device)
  266. device_idx = strtol(device, NULL, 0);
  267. err = cuInit(0);
  268. if (err != CUDA_SUCCESS) {
  269. av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
  270. return AVERROR_UNKNOWN;
  271. }
  272. err = cuDeviceGet(&cu_device, device_idx);
  273. if (err != CUDA_SUCCESS) {
  274. av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
  275. return AVERROR_UNKNOWN;
  276. }
  277. err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
  278. if (err != CUDA_SUCCESS) {
  279. av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
  280. return AVERROR_UNKNOWN;
  281. }
  282. cuCtxPopCurrent(&dummy);
  283. ctx->free = cuda_device_free;
  284. return 0;
  285. }
  286. const HWContextType ff_hwcontext_type_cuda = {
  287. .type = AV_HWDEVICE_TYPE_CUDA,
  288. .name = "CUDA",
  289. .device_hwctx_size = sizeof(AVCUDADeviceContext),
  290. .frames_priv_size = sizeof(CUDAFramesContext),
  291. .device_create = cuda_device_create,
  292. .frames_get_constraints = cuda_frames_get_constraints,
  293. .frames_init = cuda_frames_init,
  294. .frames_get_buffer = cuda_get_buffer,
  295. .transfer_get_formats = cuda_transfer_get_formats,
  296. .transfer_data_to = cuda_transfer_data_to,
  297. .transfer_data_from = cuda_transfer_data_from,
  298. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
  299. };