You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
5.0KB

  1. /*
  2. * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVFILTER_UNSHARP_KERNEL_H
  21. #define AVFILTER_UNSHARP_KERNEL_H
  22. #include "libavutil/opencl.h"
  23. const char *ff_kernel_unsharp_opencl = AV_OPENCL_KERNEL(
  24. inline unsigned char clip_uint8(int a)
  25. {
  26. if (a & (~0xFF))
  27. return (-a)>>31;
  28. else
  29. return a;
  30. }
  31. kernel void unsharp(global unsigned char *src,
  32. global unsigned char *dst,
  33. const global unsigned int *mask_lu,
  34. const global unsigned int *mask_ch,
  35. int amount_lu,
  36. int amount_ch,
  37. int step_x_lu,
  38. int step_y_lu,
  39. int step_x_ch,
  40. int step_y_ch,
  41. int scalebits_lu,
  42. int scalebits_ch,
  43. int halfscale_lu,
  44. int halfscale_ch,
  45. int src_stride_lu,
  46. int src_stride_ch,
  47. int dst_stride_lu,
  48. int dst_stride_ch,
  49. int height,
  50. int width,
  51. int ch,
  52. int cw)
  53. {
  54. global unsigned char *dst_y = dst;
  55. global unsigned char *dst_u = dst_y + height * dst_stride_lu;
  56. global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
  57. global unsigned char *src_y = src;
  58. global unsigned char *src_u = src_y + height * src_stride_lu;
  59. global unsigned char *src_v = src_u + ch * src_stride_ch;
  60. global unsigned char *temp_dst;
  61. global unsigned char *temp_src;
  62. const global unsigned int *temp_mask;
  63. int global_id = get_global_id(0);
  64. int i, j, x, y, temp_src_stride, temp_dst_stride, temp_height, temp_width, temp_steps_x, temp_steps_y,
  65. temp_amount, temp_scalebits, temp_halfscale, sum, idx_x, idx_y, temp, res;
  66. if (global_id < width * height) {
  67. y = global_id / width;
  68. x = global_id % width;
  69. temp_dst = dst_y;
  70. temp_src = src_y;
  71. temp_src_stride = src_stride_lu;
  72. temp_dst_stride = dst_stride_lu;
  73. temp_height = height;
  74. temp_width = width;
  75. temp_steps_x = step_x_lu;
  76. temp_steps_y = step_y_lu;
  77. temp_mask = mask_lu;
  78. temp_amount = amount_lu;
  79. temp_scalebits = scalebits_lu;
  80. temp_halfscale = halfscale_lu;
  81. } else if ((global_id >= width * height) && (global_id < width * height + ch * cw)) {
  82. y = (global_id - width * height) / cw;
  83. x = (global_id - width * height) % cw;
  84. temp_dst = dst_u;
  85. temp_src = src_u;
  86. temp_src_stride = src_stride_ch;
  87. temp_dst_stride = dst_stride_ch;
  88. temp_height = ch;
  89. temp_width = cw;
  90. temp_steps_x = step_x_ch;
  91. temp_steps_y = step_y_ch;
  92. temp_mask = mask_ch;
  93. temp_amount = amount_ch;
  94. temp_scalebits = scalebits_ch;
  95. temp_halfscale = halfscale_ch;
  96. } else {
  97. y = (global_id - width * height - ch * cw) / cw;
  98. x = (global_id - width * height - ch * cw) % cw;
  99. temp_dst = dst_v;
  100. temp_src = src_v;
  101. temp_src_stride = src_stride_ch;
  102. temp_dst_stride = dst_stride_ch;
  103. temp_height = ch;
  104. temp_width = cw;
  105. temp_steps_x = step_x_ch;
  106. temp_steps_y = step_y_ch;
  107. temp_mask = mask_ch;
  108. temp_amount = amount_ch;
  109. temp_scalebits = scalebits_ch;
  110. temp_halfscale = halfscale_ch;
  111. }
  112. if (temp_amount) {
  113. sum = 0;
  114. for (j = 0; j <= 2 * temp_steps_y; j++) {
  115. idx_y = (y - temp_steps_y + j) <= 0 ? 0 : (y - temp_steps_y + j) >= temp_height ? temp_height-1 : y - temp_steps_y + j;
  116. for (i = 0; i <= 2 * temp_steps_x; i++) {
  117. idx_x = (x - temp_steps_x + i) <= 0 ? 0 : (x - temp_steps_x + i) >= temp_width ? temp_width-1 : x - temp_steps_x + i;
  118. sum += temp_mask[i + j * (2 * temp_steps_x + 1)] * temp_src[idx_x + idx_y * temp_src_stride];
  119. }
  120. }
  121. temp = (int)temp_src[x + y * temp_src_stride];
  122. res = temp + (((temp - (int)((sum + temp_halfscale) >> temp_scalebits)) * temp_amount) >> 16);
  123. temp_dst[x + y * temp_dst_stride] = clip_uint8(res);
  124. } else {
  125. temp_dst[x + y * temp_dst_stride] = temp_src[x + y * temp_src_stride];
  126. }
  127. }
  128. );
  129. #endif /* AVFILTER_UNSHARP_KERNEL_H */