You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

874 lines
34KB

  1. /*
  2. * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com>
  3. * Copyright (C) 2012 Li Cao <li@multicorewareinc.com>
  4. * Copyright (C) 2012 Wei Gao <weigao@multicorewareinc.com>
  5. * Copyright (C) 2013 Lenny Wang <lwanghpc@gmail.com>
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include "opencl.h"
  24. #include "avstring.h"
  25. #include "log.h"
  26. #include "avassert.h"
  27. #include "opt.h"
  28. #include "thread.h"
  29. #include "atomic.h"
  30. static pthread_mutex_t * volatile atomic_opencl_lock = NULL;
  31. #define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock)
  32. #define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock)
  33. #else
  34. #define LOCK_OPENCL
  35. #define UNLOCK_OPENCL
  36. #endif
  37. #define MAX_KERNEL_CODE_NUM 200
  38. typedef struct {
  39. int is_compiled;
  40. const char *kernel_string;
  41. } KernelCode;
  42. typedef struct {
  43. const AVClass *class;
  44. int log_offset;
  45. void *log_ctx;
  46. int init_count;
  47. int opt_init_flag;
  48. /**
  49. * if set to 1, the OpenCL environment was created by the user and
  50. * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
  51. */
  52. int is_user_created;
  53. int platform_idx;
  54. int device_idx;
  55. cl_platform_id platform_id;
  56. cl_device_type device_type;
  57. cl_context context;
  58. cl_device_id device_id;
  59. cl_command_queue command_queue;
  60. int kernel_code_count;
  61. KernelCode kernel_code[MAX_KERNEL_CODE_NUM];
  62. AVOpenCLDeviceList device_list;
  63. } OpenclContext;
  64. #define OFFSET(x) offsetof(OpenclContext, x)
  65. static const AVOption opencl_options[] = {
  66. { "platform_idx", "set platform index value", OFFSET(platform_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX},
  67. { "device_idx", "set device index value", OFFSET(device_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX},
  68. { NULL }
  69. };
  70. static const AVClass openclutils_class = {
  71. .class_name = "opencl",
  72. .option = opencl_options,
  73. .item_name = av_default_item_name,
  74. .version = LIBAVUTIL_VERSION_INT,
  75. .log_level_offset_offset = offsetof(OpenclContext, log_offset),
  76. .parent_log_context_offset = offsetof(OpenclContext, log_ctx),
  77. };
  78. static OpenclContext opencl_ctx = {&openclutils_class};
  79. static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU};
  80. typedef struct {
  81. int err_code;
  82. const char *err_str;
  83. } OpenclErrorMsg;
  84. static const OpenclErrorMsg opencl_err_msg[] = {
  85. {CL_DEVICE_NOT_FOUND, "DEVICE NOT FOUND"},
  86. {CL_DEVICE_NOT_AVAILABLE, "DEVICE NOT AVAILABLE"},
  87. {CL_COMPILER_NOT_AVAILABLE, "COMPILER NOT AVAILABLE"},
  88. {CL_MEM_OBJECT_ALLOCATION_FAILURE, "MEM OBJECT ALLOCATION FAILURE"},
  89. {CL_OUT_OF_RESOURCES, "OUT OF RESOURCES"},
  90. {CL_OUT_OF_HOST_MEMORY, "OUT OF HOST MEMORY"},
  91. {CL_PROFILING_INFO_NOT_AVAILABLE, "PROFILING INFO NOT AVAILABLE"},
  92. {CL_MEM_COPY_OVERLAP, "MEM COPY OVERLAP"},
  93. {CL_IMAGE_FORMAT_MISMATCH, "IMAGE FORMAT MISMATCH"},
  94. {CL_IMAGE_FORMAT_NOT_SUPPORTED, "IMAGE FORMAT NOT_SUPPORTED"},
  95. {CL_BUILD_PROGRAM_FAILURE, "BUILD PROGRAM FAILURE"},
  96. {CL_MAP_FAILURE, "MAP FAILURE"},
  97. {CL_MISALIGNED_SUB_BUFFER_OFFSET, "MISALIGNED SUB BUFFER OFFSET"},
  98. {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
  99. {CL_COMPILE_PROGRAM_FAILURE, "COMPILE PROGRAM FAILURE"},
  100. {CL_LINKER_NOT_AVAILABLE, "LINKER NOT AVAILABLE"},
  101. {CL_LINK_PROGRAM_FAILURE, "LINK PROGRAM FAILURE"},
  102. {CL_DEVICE_PARTITION_FAILED, "DEVICE PARTITION FAILED"},
  103. {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "KERNEL ARG INFO NOT AVAILABLE"},
  104. {CL_INVALID_VALUE, "INVALID VALUE"},
  105. {CL_INVALID_DEVICE_TYPE, "INVALID DEVICE TYPE"},
  106. {CL_INVALID_PLATFORM, "INVALID PLATFORM"},
  107. {CL_INVALID_DEVICE, "INVALID DEVICE"},
  108. {CL_INVALID_CONTEXT, "INVALID CONTEXT"},
  109. {CL_INVALID_QUEUE_PROPERTIES, "INVALID QUEUE PROPERTIES"},
  110. {CL_INVALID_COMMAND_QUEUE, "INVALID COMMAND QUEUE"},
  111. {CL_INVALID_HOST_PTR, "INVALID HOST PTR"},
  112. {CL_INVALID_MEM_OBJECT, "INVALID MEM OBJECT"},
  113. {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "INVALID IMAGE FORMAT DESCRIPTOR"},
  114. {CL_INVALID_IMAGE_SIZE, "INVALID IMAGE SIZE"},
  115. {CL_INVALID_SAMPLER, "INVALID SAMPLER"},
  116. {CL_INVALID_BINARY, "INVALID BINARY"},
  117. {CL_INVALID_BUILD_OPTIONS, "INVALID BUILD OPTIONS"},
  118. {CL_INVALID_PROGRAM, "INVALID PROGRAM"},
  119. {CL_INVALID_PROGRAM_EXECUTABLE, "INVALID PROGRAM EXECUTABLE"},
  120. {CL_INVALID_KERNEL_NAME, "INVALID KERNEL NAME"},
  121. {CL_INVALID_KERNEL_DEFINITION, "INVALID KERNEL DEFINITION"},
  122. {CL_INVALID_KERNEL, "INVALID KERNEL"},
  123. {CL_INVALID_ARG_INDEX, "INVALID ARG INDEX"},
  124. {CL_INVALID_ARG_VALUE, "INVALID ARG VALUE"},
  125. {CL_INVALID_ARG_SIZE, "INVALID ARG_SIZE"},
  126. {CL_INVALID_KERNEL_ARGS, "INVALID KERNEL ARGS"},
  127. {CL_INVALID_WORK_DIMENSION, "INVALID WORK DIMENSION"},
  128. {CL_INVALID_WORK_GROUP_SIZE, "INVALID WORK GROUP SIZE"},
  129. {CL_INVALID_WORK_ITEM_SIZE, "INVALID WORK ITEM SIZE"},
  130. {CL_INVALID_GLOBAL_OFFSET, "INVALID GLOBAL OFFSET"},
  131. {CL_INVALID_EVENT_WAIT_LIST, "INVALID EVENT WAIT LIST"},
  132. {CL_INVALID_EVENT, "INVALID EVENT"},
  133. {CL_INVALID_OPERATION, "INVALID OPERATION"},
  134. {CL_INVALID_GL_OBJECT, "INVALID GL OBJECT"},
  135. {CL_INVALID_BUFFER_SIZE, "INVALID BUFFER SIZE"},
  136. {CL_INVALID_MIP_LEVEL, "INVALID MIP LEVEL"},
  137. {CL_INVALID_GLOBAL_WORK_SIZE, "INVALID GLOBAL WORK SIZE"},
  138. {CL_INVALID_PROPERTY, "INVALID PROPERTY"},
  139. {CL_INVALID_IMAGE_DESCRIPTOR, "INVALID IMAGE DESCRIPTOR"},
  140. {CL_INVALID_COMPILER_OPTIONS, "INVALID COMPILER OPTIONS"},
  141. {CL_INVALID_LINKER_OPTIONS, "INVALID LINKER OPTIONS"},
  142. {CL_INVALID_DEVICE_PARTITION_COUNT, "INVALID DEVICE PARTITION COUNT"},
  143. };
  144. const char *av_opencl_errstr(cl_int status)
  145. {
  146. int i;
  147. for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) {
  148. if (opencl_err_msg[i].err_code == status)
  149. return opencl_err_msg[i].err_str;
  150. }
  151. return "unknown error";
  152. }
  153. static void free_device_list(AVOpenCLDeviceList *device_list)
  154. {
  155. int i, j;
  156. if (!device_list)
  157. return;
  158. for (i = 0; i < device_list->platform_num; i++) {
  159. if (!device_list->platform_node[i])
  160. continue;
  161. for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
  162. av_freep(&(device_list->platform_node[i]->device_node[j]->device_name));
  163. av_freep(&(device_list->platform_node[i]->device_node[j]));
  164. }
  165. av_freep(&device_list->platform_node[i]->device_node);
  166. av_freep(&(device_list->platform_node[i]->platform_name));
  167. av_freep(&device_list->platform_node[i]);
  168. }
  169. av_freep(&device_list->platform_node);
  170. device_list->platform_num = 0;
  171. }
  172. static int get_device_list(AVOpenCLDeviceList *device_list)
  173. {
  174. cl_int status;
  175. int i, j, k, device_num, total_devices_num, ret = 0;
  176. int *devices_num;
  177. cl_platform_id *platform_ids = NULL;
  178. cl_device_id *device_ids = NULL;
  179. AVOpenCLDeviceNode *device_node = NULL;
  180. size_t platform_name_size = 0;
  181. size_t device_name_size = 0;
  182. status = clGetPlatformIDs(0, NULL, &device_list->platform_num);
  183. if (status != CL_SUCCESS) {
  184. av_log(&opencl_ctx, AV_LOG_ERROR,
  185. "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
  186. return AVERROR_EXTERNAL;
  187. }
  188. platform_ids = av_mallocz_array(device_list->platform_num, sizeof(cl_platform_id));
  189. if (!platform_ids)
  190. return AVERROR(ENOMEM);
  191. status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL);
  192. if (status != CL_SUCCESS) {
  193. av_log(&opencl_ctx, AV_LOG_ERROR,
  194. "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
  195. ret = AVERROR_EXTERNAL;
  196. goto end;
  197. }
  198. device_list->platform_node = av_mallocz_array(device_list->platform_num, sizeof(AVOpenCLPlatformNode *));
  199. if (!device_list->platform_node) {
  200. ret = AVERROR(ENOMEM);
  201. goto end;
  202. }
  203. devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type));
  204. if (!devices_num) {
  205. ret = AVERROR(ENOMEM);
  206. goto end;
  207. }
  208. for (i = 0; i < device_list->platform_num; i++) {
  209. device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode));
  210. if (!device_list->platform_node[i]) {
  211. ret = AVERROR(ENOMEM);
  212. goto end;
  213. }
  214. device_list->platform_node[i]->platform_id = platform_ids[i];
  215. status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
  216. 0, NULL, &platform_name_size);
  217. if (status != CL_SUCCESS) {
  218. av_log(&opencl_ctx, AV_LOG_WARNING,
  219. "Could not get size of platform name: %s\n", av_opencl_errstr(status));
  220. } else {
  221. device_list->platform_node[i]->platform_name = av_malloc(platform_name_size * sizeof(char));
  222. if (!device_list->platform_node[i]->platform_name) {
  223. av_log(&opencl_ctx, AV_LOG_WARNING,
  224. "Could not allocate memory for device name: %s\n", av_opencl_errstr(status));
  225. } else {
  226. status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
  227. platform_name_size * sizeof(char),
  228. device_list->platform_node[i]->platform_name, NULL);
  229. if (status != CL_SUCCESS) {
  230. av_log(&opencl_ctx, AV_LOG_WARNING,
  231. "Could not get platform name: %s\n", av_opencl_errstr(status));
  232. }
  233. }
  234. }
  235. total_devices_num = 0;
  236. for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
  237. status = clGetDeviceIDs(device_list->platform_node[i]->platform_id,
  238. device_type[j], 0, NULL, &devices_num[j]);
  239. total_devices_num += devices_num[j];
  240. }
  241. device_list->platform_node[i]->device_node = av_mallocz_array(total_devices_num, sizeof(AVOpenCLDeviceNode *));
  242. if (!device_list->platform_node[i]->device_node) {
  243. ret = AVERROR(ENOMEM);
  244. goto end;
  245. }
  246. for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
  247. if (devices_num[j]) {
  248. device_ids = av_mallocz_array(devices_num[j], sizeof(cl_device_id));
  249. if (!device_ids) {
  250. ret = AVERROR(ENOMEM);
  251. goto end;
  252. }
  253. status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j],
  254. devices_num[j], device_ids, NULL);
  255. if (status != CL_SUCCESS) {
  256. av_log(&opencl_ctx, AV_LOG_WARNING,
  257. "Could not get device ID: %s:\n", av_opencl_errstr(status));
  258. av_freep(&device_ids);
  259. continue;
  260. }
  261. for (k = 0; k < devices_num[j]; k++) {
  262. device_num = device_list->platform_node[i]->device_num;
  263. device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode));
  264. if (!device_list->platform_node[i]->device_node[device_num]) {
  265. ret = AVERROR(ENOMEM);
  266. goto end;
  267. }
  268. device_node = device_list->platform_node[i]->device_node[device_num];
  269. device_node->device_id = device_ids[k];
  270. device_node->device_type = device_type[j];
  271. status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME,
  272. 0, NULL, &device_name_size);
  273. if (status != CL_SUCCESS) {
  274. av_log(&opencl_ctx, AV_LOG_WARNING,
  275. "Could not get size of device name: %s\n", av_opencl_errstr(status));
  276. continue;
  277. }
  278. device_node->device_name = av_malloc(device_name_size * sizeof(char));
  279. if (!device_node->device_name) {
  280. av_log(&opencl_ctx, AV_LOG_WARNING,
  281. "Could not allocate memory for device name: %s\n", av_opencl_errstr(status));
  282. continue;
  283. }
  284. status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME,
  285. device_name_size * sizeof(char),
  286. device_node->device_name, NULL);
  287. if (status != CL_SUCCESS) {
  288. av_log(&opencl_ctx, AV_LOG_WARNING,
  289. "Could not get device name: %s\n", av_opencl_errstr(status));
  290. continue;
  291. }
  292. device_list->platform_node[i]->device_num++;
  293. }
  294. av_freep(&device_ids);
  295. }
  296. }
  297. }
  298. end:
  299. av_freep(&platform_ids);
  300. av_freep(&devices_num);
  301. av_freep(&device_ids);
  302. if (ret < 0)
  303. free_device_list(device_list);
  304. return ret;
  305. }
  306. int av_opencl_get_device_list(AVOpenCLDeviceList **device_list)
  307. {
  308. int ret = 0;
  309. *device_list = av_mallocz(sizeof(AVOpenCLDeviceList));
  310. if (!(*device_list)) {
  311. av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n");
  312. return AVERROR(ENOMEM);
  313. }
  314. ret = get_device_list(*device_list);
  315. if (ret < 0) {
  316. av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n");
  317. free_device_list(*device_list);
  318. av_freep(device_list);
  319. return ret;
  320. }
  321. return ret;
  322. }
  323. void av_opencl_free_device_list(AVOpenCLDeviceList **device_list)
  324. {
  325. free_device_list(*device_list);
  326. av_freep(device_list);
  327. }
  328. static inline int init_opencl_mtx(void)
  329. {
  330. #if HAVE_THREADS
  331. if (!atomic_opencl_lock) {
  332. int err;
  333. pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
  334. if (!tmp)
  335. return AVERROR(ENOMEM);
  336. if ((err = pthread_mutex_init(tmp, NULL))) {
  337. av_free(tmp);
  338. return AVERROR(err);
  339. }
  340. if (avpriv_atomic_ptr_cas((void * volatile *)&atomic_opencl_lock, NULL, tmp)) {
  341. pthread_mutex_destroy(tmp);
  342. av_free(tmp);
  343. }
  344. }
  345. #endif
  346. return 0;
  347. }
  348. int av_opencl_set_option(const char *key, const char *val)
  349. {
  350. int ret = init_opencl_mtx( );
  351. if (ret < 0)
  352. return ret;
  353. LOCK_OPENCL;
  354. if (!opencl_ctx.opt_init_flag) {
  355. av_opt_set_defaults(&opencl_ctx);
  356. opencl_ctx.opt_init_flag = 1;
  357. }
  358. ret = av_opt_set(&opencl_ctx, key, val, 0);
  359. UNLOCK_OPENCL;
  360. return ret;
  361. }
  362. int av_opencl_get_option(const char *key, uint8_t **out_val)
  363. {
  364. int ret = 0;
  365. LOCK_OPENCL;
  366. ret = av_opt_get(&opencl_ctx, key, 0, out_val);
  367. UNLOCK_OPENCL;
  368. return ret;
  369. }
  370. void av_opencl_free_option(void)
  371. {
  372. /*FIXME: free openclutils context*/
  373. LOCK_OPENCL;
  374. av_opt_free(&opencl_ctx);
  375. UNLOCK_OPENCL;
  376. }
  377. AVOpenCLExternalEnv *av_opencl_alloc_external_env(void)
  378. {
  379. AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv));
  380. if (!ext) {
  381. av_log(&opencl_ctx, AV_LOG_ERROR,
  382. "Could not malloc external opencl environment data space\n");
  383. }
  384. return ext;
  385. }
  386. void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env)
  387. {
  388. av_freep(ext_opencl_env);
  389. }
  390. int av_opencl_register_kernel_code(const char *kernel_code)
  391. {
  392. int i, ret = init_opencl_mtx( );
  393. if (ret < 0)
  394. return ret;
  395. LOCK_OPENCL;
  396. if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) {
  397. av_log(&opencl_ctx, AV_LOG_ERROR,
  398. "Could not register kernel code, maximum number of registered kernel code %d already reached\n",
  399. MAX_KERNEL_CODE_NUM);
  400. ret = AVERROR(EINVAL);
  401. goto end;
  402. }
  403. for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
  404. if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) {
  405. av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n");
  406. goto end;
  407. }
  408. }
  409. opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code;
  410. opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0;
  411. opencl_ctx.kernel_code_count++;
  412. end:
  413. UNLOCK_OPENCL;
  414. return ret;
  415. }
  416. cl_program av_opencl_compile(const char *program_name, const char *build_opts)
  417. {
  418. int i;
  419. cl_int status, build_status;
  420. int kernel_code_idx = 0;
  421. const char *kernel_source;
  422. size_t kernel_code_len;
  423. char* ptr = NULL;
  424. cl_program program = NULL;
  425. size_t log_size;
  426. char *log = NULL;
  427. LOCK_OPENCL;
  428. for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
  429. // identify a program using a unique name within the kernel source
  430. ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name);
  431. if (ptr && !opencl_ctx.kernel_code[i].is_compiled) {
  432. kernel_source = opencl_ctx.kernel_code[i].kernel_string;
  433. kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string);
  434. kernel_code_idx = i;
  435. break;
  436. }
  437. }
  438. if (!kernel_source) {
  439. av_log(&opencl_ctx, AV_LOG_ERROR,
  440. "Unable to find OpenCL kernel source '%s'\n", program_name);
  441. goto end;
  442. }
  443. /* create a CL program from kernel source */
  444. program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status);
  445. if(status != CL_SUCCESS) {
  446. av_log(&opencl_ctx, AV_LOG_ERROR,
  447. "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status));
  448. program = NULL;
  449. goto end;
  450. }
  451. build_status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL);
  452. status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
  453. CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
  454. if (status != CL_SUCCESS) {
  455. av_log(&opencl_ctx, AV_LOG_WARNING,
  456. "Failed to get compilation log: %s\n",
  457. av_opencl_errstr(status));
  458. } else {
  459. log = av_malloc(log_size);
  460. if (log) {
  461. status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
  462. CL_PROGRAM_BUILD_LOG, log_size,
  463. log, NULL);
  464. if (status != CL_SUCCESS) {
  465. av_log(&opencl_ctx, AV_LOG_WARNING,
  466. "Failed to get compilation log: %s\n",
  467. av_opencl_errstr(status));
  468. } else {
  469. int level = build_status == CL_SUCCESS ? AV_LOG_DEBUG :
  470. AV_LOG_ERROR;
  471. av_log(&opencl_ctx, level, "Compilation log:\n%s\n", log);
  472. }
  473. }
  474. av_freep(&log);
  475. }
  476. if (build_status != CL_SUCCESS) {
  477. av_log(&opencl_ctx, AV_LOG_ERROR,
  478. "Compilation failed with OpenCL program '%s': %s\n",
  479. program_name, av_opencl_errstr(build_status));
  480. program = NULL;
  481. goto end;
  482. }
  483. opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1;
  484. end:
  485. UNLOCK_OPENCL;
  486. return program;
  487. }
  488. cl_command_queue av_opencl_get_command_queue(void)
  489. {
  490. return opencl_ctx.command_queue;
  491. }
  492. static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env)
  493. {
  494. cl_int status;
  495. cl_context_properties cps[3];
  496. int i, ret = 0;
  497. AVOpenCLDeviceNode *device_node = NULL;
  498. if (ext_opencl_env) {
  499. if (opencl_ctx->is_user_created)
  500. return 0;
  501. opencl_ctx->platform_id = ext_opencl_env->platform_id;
  502. opencl_ctx->is_user_created = 1;
  503. opencl_ctx->command_queue = ext_opencl_env->command_queue;
  504. opencl_ctx->context = ext_opencl_env->context;
  505. opencl_ctx->device_id = ext_opencl_env->device_id;
  506. opencl_ctx->device_type = ext_opencl_env->device_type;
  507. } else {
  508. if (!opencl_ctx->is_user_created) {
  509. if (!opencl_ctx->device_list.platform_num) {
  510. ret = get_device_list(&opencl_ctx->device_list);
  511. if (ret < 0) {
  512. return ret;
  513. }
  514. }
  515. if (opencl_ctx->platform_idx >= 0) {
  516. if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) {
  517. av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n");
  518. return AVERROR(EINVAL);
  519. }
  520. if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) {
  521. av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n",
  522. opencl_ctx->platform_idx);
  523. return AVERROR(EINVAL);
  524. }
  525. opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id;
  526. } else {
  527. /* get a usable platform by default*/
  528. for (i = 0; i < opencl_ctx->device_list.platform_num; i++) {
  529. if (opencl_ctx->device_list.platform_node[i]->device_num) {
  530. opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id;
  531. opencl_ctx->platform_idx = i;
  532. break;
  533. }
  534. }
  535. }
  536. if (!opencl_ctx->platform_id) {
  537. av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
  538. return AVERROR_EXTERNAL;
  539. }
  540. /* get a usable device*/
  541. if (opencl_ctx->device_idx >= 0) {
  542. if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) {
  543. av_log(opencl_ctx, AV_LOG_ERROR,
  544. "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx);
  545. return AVERROR(EINVAL);
  546. }
  547. } else {
  548. opencl_ctx->device_idx = 0;
  549. }
  550. device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx];
  551. opencl_ctx->device_id = device_node->device_id;
  552. opencl_ctx->device_type = device_node->device_type;
  553. /*
  554. * Use available platform.
  555. */
  556. av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n",
  557. opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name,
  558. device_node->device_name);
  559. cps[0] = CL_CONTEXT_PLATFORM;
  560. cps[1] = (cl_context_properties)opencl_ctx->platform_id;
  561. cps[2] = 0;
  562. opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type,
  563. NULL, NULL, &status);
  564. if (status != CL_SUCCESS) {
  565. av_log(opencl_ctx, AV_LOG_ERROR,
  566. "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status));
  567. return AVERROR_EXTERNAL;
  568. }
  569. opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id,
  570. 0, &status);
  571. if (status != CL_SUCCESS) {
  572. av_log(opencl_ctx, AV_LOG_ERROR,
  573. "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status));
  574. return AVERROR_EXTERNAL;
  575. }
  576. }
  577. }
  578. return ret;
  579. }
  580. int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env)
  581. {
  582. int ret = init_opencl_mtx( );
  583. if (ret < 0)
  584. return ret;
  585. LOCK_OPENCL;
  586. if (!opencl_ctx.init_count) {
  587. if (!opencl_ctx.opt_init_flag) {
  588. av_opt_set_defaults(&opencl_ctx);
  589. opencl_ctx.opt_init_flag = 1;
  590. }
  591. ret = init_opencl_env(&opencl_ctx, ext_opencl_env);
  592. if (ret < 0)
  593. goto end;
  594. if (opencl_ctx.kernel_code_count <= 0) {
  595. av_log(&opencl_ctx, AV_LOG_ERROR,
  596. "No kernel code is registered, compile kernel file failed\n");
  597. ret = AVERROR(EINVAL);
  598. goto end;
  599. }
  600. }
  601. opencl_ctx.init_count++;
  602. end:
  603. UNLOCK_OPENCL;
  604. return ret;
  605. }
  606. void av_opencl_uninit(void)
  607. {
  608. int i;
  609. cl_int status;
  610. LOCK_OPENCL;
  611. opencl_ctx.init_count--;
  612. if (opencl_ctx.is_user_created)
  613. goto end;
  614. if (opencl_ctx.init_count > 0)
  615. goto end;
  616. if (opencl_ctx.command_queue) {
  617. status = clReleaseCommandQueue(opencl_ctx.command_queue);
  618. if (status != CL_SUCCESS) {
  619. av_log(&opencl_ctx, AV_LOG_ERROR,
  620. "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status));
  621. }
  622. opencl_ctx.command_queue = NULL;
  623. }
  624. if (opencl_ctx.context) {
  625. status = clReleaseContext(opencl_ctx.context);
  626. if (status != CL_SUCCESS) {
  627. av_log(&opencl_ctx, AV_LOG_ERROR,
  628. "Could not release OpenCL context: %s\n", av_opencl_errstr(status));
  629. }
  630. opencl_ctx.context = NULL;
  631. }
  632. for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
  633. opencl_ctx.kernel_code[i].is_compiled = 0;
  634. }
  635. free_device_list(&opencl_ctx.device_list);
  636. end:
  637. if (opencl_ctx.init_count <= 0)
  638. av_opt_free(&opencl_ctx); //FIXME: free openclutils context
  639. UNLOCK_OPENCL;
  640. }
  641. int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
  642. {
  643. cl_int status;
  644. *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status);
  645. if (status != CL_SUCCESS) {
  646. av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status));
  647. return AVERROR_EXTERNAL;
  648. }
  649. return 0;
  650. }
  651. void av_opencl_buffer_release(cl_mem *cl_buf)
  652. {
  653. cl_int status = 0;
  654. if (!cl_buf)
  655. return;
  656. status = clReleaseMemObject(*cl_buf);
  657. if (status != CL_SUCCESS) {
  658. av_log(&opencl_ctx, AV_LOG_ERROR,
  659. "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status));
  660. }
  661. memset(cl_buf, 0, sizeof(*cl_buf));
  662. }
  663. int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
  664. {
  665. cl_int status;
  666. void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
  667. CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
  668. 0, NULL, NULL, &status);
  669. if (status != CL_SUCCESS) {
  670. av_log(&opencl_ctx, AV_LOG_ERROR,
  671. "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
  672. return AVERROR_EXTERNAL;
  673. }
  674. memcpy(mapped, src_buf, buf_size);
  675. status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
  676. if (status != CL_SUCCESS) {
  677. av_log(&opencl_ctx, AV_LOG_ERROR,
  678. "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
  679. return AVERROR_EXTERNAL;
  680. }
  681. return 0;
  682. }
  683. int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
  684. {
  685. cl_int status;
  686. void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
  687. CL_TRUE, CL_MAP_READ, 0, buf_size,
  688. 0, NULL, NULL, &status);
  689. if (status != CL_SUCCESS) {
  690. av_log(&opencl_ctx, AV_LOG_ERROR,
  691. "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
  692. return AVERROR_EXTERNAL;
  693. }
  694. memcpy(dst_buf, mapped, buf_size);
  695. status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
  696. if (status != CL_SUCCESS) {
  697. av_log(&opencl_ctx, AV_LOG_ERROR,
  698. "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
  699. return AVERROR_EXTERNAL;
  700. }
  701. return 0;
  702. }
  703. int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
  704. uint8_t **src_data, int *plane_size, int plane_num)
  705. {
  706. int i, buffer_size = 0;
  707. uint8_t *temp;
  708. cl_int status;
  709. void *mapped;
  710. if ((unsigned int)plane_num > 8) {
  711. return AVERROR(EINVAL);
  712. }
  713. for (i = 0;i < plane_num;i++) {
  714. buffer_size += plane_size[i];
  715. }
  716. if (buffer_size > cl_buffer_size) {
  717. av_log(&opencl_ctx, AV_LOG_ERROR,
  718. "Cannot write image to OpenCL buffer: buffer too small\n");
  719. return AVERROR(EINVAL);
  720. }
  721. mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
  722. CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset,
  723. 0, NULL, NULL, &status);
  724. if (status != CL_SUCCESS) {
  725. av_log(&opencl_ctx, AV_LOG_ERROR,
  726. "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
  727. return AVERROR_EXTERNAL;
  728. }
  729. temp = mapped;
  730. temp += dst_cl_offset;
  731. for (i = 0; i < plane_num; i++) {
  732. memcpy(temp, src_data[i], plane_size[i]);
  733. temp += plane_size[i];
  734. }
  735. status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
  736. if (status != CL_SUCCESS) {
  737. av_log(&opencl_ctx, AV_LOG_ERROR,
  738. "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
  739. return AVERROR_EXTERNAL;
  740. }
  741. return 0;
  742. }
  743. int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
  744. cl_mem src_cl_buf, size_t cl_buffer_size)
  745. {
  746. int i,buffer_size = 0,ret = 0;
  747. uint8_t *temp;
  748. void *mapped;
  749. cl_int status;
  750. if ((unsigned int)plane_num > 8) {
  751. return AVERROR(EINVAL);
  752. }
  753. for (i = 0; i < plane_num; i++) {
  754. buffer_size += plane_size[i];
  755. }
  756. if (buffer_size > cl_buffer_size) {
  757. av_log(&opencl_ctx, AV_LOG_ERROR,
  758. "Cannot write image to CPU buffer: OpenCL buffer too small\n");
  759. return AVERROR(EINVAL);
  760. }
  761. mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
  762. CL_TRUE, CL_MAP_READ, 0, buffer_size,
  763. 0, NULL, NULL, &status);
  764. if (status != CL_SUCCESS) {
  765. av_log(&opencl_ctx, AV_LOG_ERROR,
  766. "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
  767. return AVERROR_EXTERNAL;
  768. }
  769. temp = mapped;
  770. if (ret >= 0) {
  771. for (i = 0; i < plane_num; i++) {
  772. memcpy(dst_data[i], temp, plane_size[i]);
  773. temp += plane_size[i];
  774. }
  775. }
  776. status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
  777. if (status != CL_SUCCESS) {
  778. av_log(&opencl_ctx, AV_LOG_ERROR,
  779. "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
  780. return AVERROR_EXTERNAL;
  781. }
  782. return 0;
  783. }
  784. int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform,
  785. int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env))
  786. {
  787. int64_t ret = 0;
  788. cl_int status;
  789. cl_context_properties cps[3];
  790. AVOpenCLExternalEnv *ext_opencl_env = NULL;
  791. ext_opencl_env = av_opencl_alloc_external_env();
  792. ext_opencl_env->device_id = device_node->device_id;
  793. ext_opencl_env->device_type = device_node->device_type;
  794. av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n",
  795. device_node->device_name);
  796. cps[0] = CL_CONTEXT_PLATFORM;
  797. cps[1] = (cl_context_properties)platform;
  798. cps[2] = 0;
  799. ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type,
  800. NULL, NULL, &status);
  801. if (status != CL_SUCCESS || !ext_opencl_env->context) {
  802. ret = AVERROR_EXTERNAL;
  803. goto end;
  804. }
  805. ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context,
  806. ext_opencl_env->device_id, 0, &status);
  807. if (status != CL_SUCCESS || !ext_opencl_env->command_queue) {
  808. ret = AVERROR_EXTERNAL;
  809. goto end;
  810. }
  811. ret = benchmark(ext_opencl_env);
  812. if (ret < 0)
  813. av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n",
  814. device_node->device_name);
  815. end:
  816. if (ext_opencl_env->command_queue)
  817. clReleaseCommandQueue(ext_opencl_env->command_queue);
  818. if (ext_opencl_env->context)
  819. clReleaseContext(ext_opencl_env->context);
  820. av_opencl_free_external_env(&ext_opencl_env);
  821. return ret;
  822. }