You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1742 lines
64KB

  1. /*
  2. * HEVC video decoder
  3. *
  4. * Copyright (C) 2012 - 2013 Guillaume Martres
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "get_bits.h"
  23. #include "hevc.h"
  24. #include "bit_depth_template.c"
  25. #include "hevcdsp.h"
  26. static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
  27. GetBitContext *gb, int pcm_bit_depth)
  28. {
  29. int x, y;
  30. pixel *dst = (pixel *)_dst;
  31. stride /= sizeof(pixel);
  32. for (y = 0; y < height; y++) {
  33. for (x = 0; x < width; x++)
  34. dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
  35. dst += stride;
  36. }
  37. }
  38. static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
  39. ptrdiff_t stride)
  40. {
  41. int x, y;
  42. pixel *dst = (pixel *)_dst;
  43. stride /= sizeof(pixel);
  44. for (y = 0; y < 4; y++) {
  45. for (x = 0; x < 4; x++) {
  46. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  47. coeffs++;
  48. }
  49. dst += stride;
  50. }
  51. }
  52. static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
  53. ptrdiff_t stride)
  54. {
  55. int x, y;
  56. pixel *dst = (pixel *)_dst;
  57. stride /= sizeof(pixel);
  58. for (y = 0; y < 8; y++) {
  59. for (x = 0; x < 8; x++) {
  60. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  61. coeffs++;
  62. }
  63. dst += stride;
  64. }
  65. }
  66. static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
  67. ptrdiff_t stride)
  68. {
  69. int x, y;
  70. pixel *dst = (pixel *)_dst;
  71. stride /= sizeof(pixel);
  72. for (y = 0; y < 16; y++) {
  73. for (x = 0; x < 16; x++) {
  74. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  75. coeffs++;
  76. }
  77. dst += stride;
  78. }
  79. }
  80. static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
  81. ptrdiff_t stride)
  82. {
  83. int x, y;
  84. pixel *dst = (pixel *)_dst;
  85. stride /= sizeof(pixel);
  86. for (y = 0; y < 32; y++) {
  87. for (x = 0; x < 32; x++) {
  88. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  89. coeffs++;
  90. }
  91. dst += stride;
  92. }
  93. }
  94. static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
  95. {
  96. int16_t *coeffs = (int16_t *) _coeffs;
  97. int x, y;
  98. int size = 1 << log2_size;
  99. if (mode) {
  100. coeffs += size;
  101. for (y = 0; y < size - 1; y++) {
  102. for (x = 0; x < size; x++)
  103. coeffs[x] += coeffs[x - size];
  104. coeffs += size;
  105. }
  106. } else {
  107. for (y = 0; y < size; y++) {
  108. for (x = 1; x < size; x++)
  109. coeffs[x] += coeffs[x - 1];
  110. coeffs += size;
  111. }
  112. }
  113. }
  114. static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
  115. {
  116. int shift = 15 - BIT_DEPTH - log2_size;
  117. int x, y;
  118. int size = 1 << log2_size;
  119. int16_t *coeffs = _coeffs;
  120. if (shift > 0) {
  121. int offset = 1 << (shift - 1);
  122. for (y = 0; y < size; y++) {
  123. for (x = 0; x < size; x++) {
  124. *coeffs = (*coeffs + offset) >> shift;
  125. coeffs++;
  126. }
  127. }
  128. } else {
  129. for (y = 0; y < size; y++) {
  130. for (x = 0; x < size; x++) {
  131. *coeffs = *coeffs << -shift;
  132. coeffs++;
  133. }
  134. }
  135. }
  136. }
  137. #define SET(dst, x) (dst) = (x)
  138. #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
  139. #define ADD_AND_SCALE(dst, x) \
  140. (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
  141. #define TR_4x4_LUMA(dst, src, step, assign) \
  142. do { \
  143. int c0 = src[0 * step] + src[2 * step]; \
  144. int c1 = src[2 * step] + src[3 * step]; \
  145. int c2 = src[0 * step] - src[3 * step]; \
  146. int c3 = 74 * src[1 * step]; \
  147. \
  148. assign(dst[2 * step], 74 * (src[0 * step] - \
  149. src[2 * step] + \
  150. src[3 * step])); \
  151. assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
  152. assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
  153. assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
  154. } while (0)
  155. static void FUNC(transform_4x4_luma)(int16_t *coeffs)
  156. {
  157. int i;
  158. int shift = 7;
  159. int add = 1 << (shift - 1);
  160. int16_t *src = coeffs;
  161. for (i = 0; i < 4; i++) {
  162. TR_4x4_LUMA(src, src, 4, SCALE);
  163. src++;
  164. }
  165. shift = 20 - BIT_DEPTH;
  166. add = 1 << (shift - 1);
  167. for (i = 0; i < 4; i++) {
  168. TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
  169. coeffs += 4;
  170. }
  171. }
  172. #undef TR_4x4_LUMA
  173. #define TR_4(dst, src, dstep, sstep, assign, end) \
  174. do { \
  175. const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
  176. const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
  177. const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
  178. const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
  179. \
  180. assign(dst[0 * dstep], e0 + o0); \
  181. assign(dst[1 * dstep], e1 + o1); \
  182. assign(dst[2 * dstep], e1 - o1); \
  183. assign(dst[3 * dstep], e0 - o0); \
  184. } while (0)
  185. #define TR_8(dst, src, dstep, sstep, assign, end) \
  186. do { \
  187. int i, j; \
  188. int e_8[4]; \
  189. int o_8[4] = { 0 }; \
  190. for (i = 0; i < 4; i++) \
  191. for (j = 1; j < end; j += 2) \
  192. o_8[i] += transform[4 * j][i] * src[j * sstep]; \
  193. TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
  194. \
  195. for (i = 0; i < 4; i++) { \
  196. assign(dst[i * dstep], e_8[i] + o_8[i]); \
  197. assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
  198. } \
  199. } while (0)
  200. #define TR_16(dst, src, dstep, sstep, assign, end) \
  201. do { \
  202. int i, j; \
  203. int e_16[8]; \
  204. int o_16[8] = { 0 }; \
  205. for (i = 0; i < 8; i++) \
  206. for (j = 1; j < end; j += 2) \
  207. o_16[i] += transform[2 * j][i] * src[j * sstep]; \
  208. TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
  209. \
  210. for (i = 0; i < 8; i++) { \
  211. assign(dst[i * dstep], e_16[i] + o_16[i]); \
  212. assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
  213. } \
  214. } while (0)
  215. #define TR_32(dst, src, dstep, sstep, assign, end) \
  216. do { \
  217. int i, j; \
  218. int e_32[16]; \
  219. int o_32[16] = { 0 }; \
  220. for (i = 0; i < 16; i++) \
  221. for (j = 1; j < end; j += 2) \
  222. o_32[i] += transform[j][i] * src[j * sstep]; \
  223. TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
  224. \
  225. for (i = 0; i < 16; i++) { \
  226. assign(dst[i * dstep], e_32[i] + o_32[i]); \
  227. assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
  228. } \
  229. } while (0)
  230. #define IDCT_VAR4(H) \
  231. int limit2 = FFMIN(col_limit + 4, H)
  232. #define IDCT_VAR8(H) \
  233. int limit = FFMIN(col_limit, H); \
  234. int limit2 = FFMIN(col_limit + 4, H)
  235. #define IDCT_VAR16(H) IDCT_VAR8(H)
  236. #define IDCT_VAR32(H) IDCT_VAR8(H)
  237. #define IDCT(H) \
  238. static void FUNC(idct_##H ##x ##H )( \
  239. int16_t *coeffs, int col_limit) { \
  240. int i; \
  241. int shift = 7; \
  242. int add = 1 << (shift - 1); \
  243. int16_t *src = coeffs; \
  244. IDCT_VAR ##H(H); \
  245. \
  246. for (i = 0; i < H; i++) { \
  247. TR_ ## H(src, src, H, H, SCALE, limit2); \
  248. if (limit2 < H && i%4 == 0 && !!i) \
  249. limit2 -= 4; \
  250. src++; \
  251. } \
  252. \
  253. shift = 20 - BIT_DEPTH; \
  254. add = 1 << (shift - 1); \
  255. for (i = 0; i < H; i++) { \
  256. TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
  257. coeffs += H; \
  258. } \
  259. }
  260. #define IDCT_DC(H) \
  261. static void FUNC(idct_##H ##x ##H ##_dc)( \
  262. int16_t *coeffs) { \
  263. int i, j; \
  264. int shift = 14 - BIT_DEPTH; \
  265. int add = 1 << (shift - 1); \
  266. int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
  267. \
  268. for (j = 0; j < H; j++) { \
  269. for (i = 0; i < H; i++) { \
  270. coeffs[i+j*H] = coeff; \
  271. } \
  272. } \
  273. }
  274. IDCT( 4)
  275. IDCT( 8)
  276. IDCT(16)
  277. IDCT(32)
  278. IDCT_DC( 4)
  279. IDCT_DC( 8)
  280. IDCT_DC(16)
  281. IDCT_DC(32)
  282. #undef TR_4
  283. #undef TR_8
  284. #undef TR_16
  285. #undef TR_32
  286. #undef SET
  287. #undef SCALE
  288. #undef ADD_AND_SCALE
  289. static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
  290. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  291. int *borders, int width, int height,
  292. int c_idx)
  293. {
  294. pixel *dst = (pixel *)_dst;
  295. pixel *src = (pixel *)_src;
  296. int offset_table[32] = { 0 };
  297. int k, y, x;
  298. int shift = BIT_DEPTH - 5;
  299. int *sao_offset_val = sao->offset_val[c_idx];
  300. int sao_left_class = sao->band_position[c_idx];
  301. stride_dst /= sizeof(pixel);
  302. stride_src /= sizeof(pixel);
  303. for (k = 0; k < 4; k++)
  304. offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
  305. for (y = 0; y < height; y++) {
  306. for (x = 0; x < width; x++)
  307. dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
  308. dst += stride_dst;
  309. src += stride_src;
  310. }
  311. }
  312. #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
  313. static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
  314. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  315. int width, int height,
  316. int c_idx, int init_x, int init_y) {
  317. static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
  318. static const int8_t pos[4][2][2] = {
  319. { { -1, 0 }, { 1, 0 } }, // horizontal
  320. { { 0, -1 }, { 0, 1 } }, // vertical
  321. { { -1, -1 }, { 1, 1 } }, // 45 degree
  322. { { 1, -1 }, { -1, 1 } }, // 135 degree
  323. };
  324. int *sao_offset_val = sao->offset_val[c_idx];
  325. int sao_eo_class = sao->eo_class[c_idx];
  326. pixel *dst = (pixel *)_dst;
  327. pixel *src = (pixel *)_src;
  328. int y_stride_src = init_y * stride_src;
  329. int y_stride_dst = init_y * stride_dst;
  330. int pos_0_0 = pos[sao_eo_class][0][0];
  331. int pos_0_1 = pos[sao_eo_class][0][1];
  332. int pos_1_0 = pos[sao_eo_class][1][0];
  333. int pos_1_1 = pos[sao_eo_class][1][1];
  334. int x, y;
  335. int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
  336. int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
  337. for (y = init_y; y < height; y++) {
  338. for (x = init_x; x < width; x++) {
  339. int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
  340. int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
  341. int offset_val = edge_idx[2 + diff0 + diff1];
  342. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
  343. }
  344. y_stride_src += stride_src;
  345. y_stride_dst += stride_dst;
  346. y_stride_0_1 += stride_src;
  347. y_stride_1_1 += stride_src;
  348. }
  349. }
  350. static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
  351. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  352. int *borders, int _width, int _height,
  353. int c_idx, uint8_t *vert_edge,
  354. uint8_t *horiz_edge, uint8_t *diag_edge)
  355. {
  356. int x, y;
  357. pixel *dst = (pixel *)_dst;
  358. pixel *src = (pixel *)_src;
  359. int *sao_offset_val = sao->offset_val[c_idx];
  360. int sao_eo_class = sao->eo_class[c_idx];
  361. int init_x = 0, init_y = 0, width = _width, height = _height;
  362. stride_dst /= sizeof(pixel);
  363. stride_src /= sizeof(pixel);
  364. if (sao_eo_class != SAO_EO_VERT) {
  365. if (borders[0]) {
  366. int offset_val = sao_offset_val[0];
  367. for (y = 0; y < height; y++) {
  368. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  369. }
  370. init_x = 1;
  371. }
  372. if (borders[2]) {
  373. int offset_val = sao_offset_val[0];
  374. int offset = width - 1;
  375. for (x = 0; x < height; x++) {
  376. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  377. }
  378. width--;
  379. }
  380. }
  381. if (sao_eo_class != SAO_EO_HORIZ) {
  382. if (borders[1]) {
  383. int offset_val = sao_offset_val[0];
  384. for (x = init_x; x < width; x++)
  385. dst[x] = av_clip_pixel(src[x] + offset_val);
  386. init_y = 1;
  387. }
  388. if (borders[3]) {
  389. int offset_val = sao_offset_val[0];
  390. int y_stride_dst = stride_dst * (height - 1);
  391. int y_stride_src = stride_src * (height - 1);
  392. for (x = init_x; x < width; x++)
  393. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  394. height--;
  395. }
  396. }
  397. FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
  398. }
  399. static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
  400. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  401. int *borders, int _width, int _height,
  402. int c_idx, uint8_t *vert_edge,
  403. uint8_t *horiz_edge, uint8_t *diag_edge)
  404. {
  405. int x, y;
  406. pixel *dst = (pixel *)_dst;
  407. pixel *src = (pixel *)_src;
  408. int *sao_offset_val = sao->offset_val[c_idx];
  409. int sao_eo_class = sao->eo_class[c_idx];
  410. int init_x = 0, init_y = 0, width = _width, height = _height;
  411. stride_dst /= sizeof(pixel);
  412. stride_src /= sizeof(pixel);
  413. if (sao_eo_class != SAO_EO_VERT) {
  414. if (borders[0]) {
  415. int offset_val = sao_offset_val[0];
  416. for (y = 0; y < height; y++) {
  417. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  418. }
  419. init_x = 1;
  420. }
  421. if (borders[2]) {
  422. int offset_val = sao_offset_val[0];
  423. int offset = width - 1;
  424. for (x = 0; x < height; x++) {
  425. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  426. }
  427. width--;
  428. }
  429. }
  430. if (sao_eo_class != SAO_EO_HORIZ) {
  431. if (borders[1]) {
  432. int offset_val = sao_offset_val[0];
  433. for (x = init_x; x < width; x++)
  434. dst[x] = av_clip_pixel(src[x] + offset_val);
  435. init_y = 1;
  436. }
  437. if (borders[3]) {
  438. int offset_val = sao_offset_val[0];
  439. int y_stride_dst = stride_dst * (height - 1);
  440. int y_stride_src = stride_src * (height - 1);
  441. for (x = init_x; x < width; x++)
  442. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  443. height--;
  444. }
  445. }
  446. FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
  447. {
  448. int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
  449. int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
  450. int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
  451. int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
  452. // Restore pixels that can't be modified
  453. if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
  454. for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
  455. dst[y*stride_dst] = src[y*stride_src];
  456. }
  457. if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
  458. for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
  459. dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
  460. }
  461. if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
  462. for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
  463. dst[x] = src[x];
  464. }
  465. if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
  466. for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
  467. dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
  468. }
  469. if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
  470. dst[0] = src[0];
  471. if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
  472. dst[width-1] = src[width-1];
  473. if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
  474. dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
  475. if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
  476. dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
  477. }
  478. }
  479. #undef CMP
  480. ////////////////////////////////////////////////////////////////////////////////
  481. //
  482. ////////////////////////////////////////////////////////////////////////////////
  483. static void FUNC(put_hevc_pel_pixels)(int16_t *dst, ptrdiff_t dststride,
  484. uint8_t *_src, ptrdiff_t _srcstride,
  485. int height, intptr_t mx, intptr_t my, int width)
  486. {
  487. int x, y;
  488. pixel *src = (pixel *)_src;
  489. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  490. for (y = 0; y < height; y++) {
  491. for (x = 0; x < width; x++)
  492. dst[x] = src[x] << (14 - BIT_DEPTH);
  493. src += srcstride;
  494. dst += dststride;
  495. }
  496. }
  497. static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  498. int height, intptr_t mx, intptr_t my, int width)
  499. {
  500. int y;
  501. pixel *src = (pixel *)_src;
  502. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  503. pixel *dst = (pixel *)_dst;
  504. ptrdiff_t dststride = _dststride / sizeof(pixel);
  505. for (y = 0; y < height; y++) {
  506. memcpy(dst, src, width * sizeof(pixel));
  507. src += srcstride;
  508. dst += dststride;
  509. }
  510. }
  511. static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  512. int16_t *src2, ptrdiff_t src2stride,
  513. int height, intptr_t mx, intptr_t my, int width)
  514. {
  515. int x, y;
  516. pixel *src = (pixel *)_src;
  517. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  518. pixel *dst = (pixel *)_dst;
  519. ptrdiff_t dststride = _dststride / sizeof(pixel);
  520. int shift = 14 + 1 - BIT_DEPTH;
  521. #if BIT_DEPTH < 14
  522. int offset = 1 << (shift - 1);
  523. #else
  524. int offset = 0;
  525. #endif
  526. for (y = 0; y < height; y++) {
  527. for (x = 0; x < width; x++)
  528. dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
  529. src += srcstride;
  530. dst += dststride;
  531. src2 += src2stride;
  532. }
  533. }
  534. static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  535. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  536. {
  537. int x, y;
  538. pixel *src = (pixel *)_src;
  539. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  540. pixel *dst = (pixel *)_dst;
  541. ptrdiff_t dststride = _dststride / sizeof(pixel);
  542. int shift = denom + 14 - BIT_DEPTH;
  543. #if BIT_DEPTH < 14
  544. int offset = 1 << (shift - 1);
  545. #else
  546. int offset = 0;
  547. #endif
  548. ox = ox * (1 << (BIT_DEPTH - 8));
  549. for (y = 0; y < height; y++) {
  550. for (x = 0; x < width; x++)
  551. dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
  552. src += srcstride;
  553. dst += dststride;
  554. }
  555. }
  556. static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  557. int16_t *src2, ptrdiff_t src2stride,
  558. int height, int denom, int wx0, int wx1,
  559. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  560. {
  561. int x, y;
  562. pixel *src = (pixel *)_src;
  563. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  564. pixel *dst = (pixel *)_dst;
  565. ptrdiff_t dststride = _dststride / sizeof(pixel);
  566. int shift = 14 + 1 - BIT_DEPTH;
  567. int log2Wd = denom + shift - 1;
  568. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  569. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  570. for (y = 0; y < height; y++) {
  571. for (x = 0; x < width; x++) {
  572. dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  573. }
  574. src += srcstride;
  575. dst += dststride;
  576. src2 += src2stride;
  577. }
  578. }
  579. ////////////////////////////////////////////////////////////////////////////////
  580. //
  581. ////////////////////////////////////////////////////////////////////////////////
  582. #define QPEL_FILTER(src, stride) \
  583. (filter[0] * src[x - 3 * stride] + \
  584. filter[1] * src[x - 2 * stride] + \
  585. filter[2] * src[x - stride] + \
  586. filter[3] * src[x ] + \
  587. filter[4] * src[x + stride] + \
  588. filter[5] * src[x + 2 * stride] + \
  589. filter[6] * src[x + 3 * stride] + \
  590. filter[7] * src[x + 4 * stride])
  591. static void FUNC(put_hevc_qpel_h)(int16_t *dst, ptrdiff_t dststride,
  592. uint8_t *_src, ptrdiff_t _srcstride,
  593. int height, intptr_t mx, intptr_t my, int width)
  594. {
  595. int x, y;
  596. pixel *src = (pixel*)_src;
  597. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  598. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  599. for (y = 0; y < height; y++) {
  600. for (x = 0; x < width; x++)
  601. dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  602. src += srcstride;
  603. dst += dststride;
  604. }
  605. }
  606. static void FUNC(put_hevc_qpel_v)(int16_t *dst, ptrdiff_t dststride,
  607. uint8_t *_src, ptrdiff_t _srcstride,
  608. int height, intptr_t mx, intptr_t my, int width)
  609. {
  610. int x, y;
  611. pixel *src = (pixel*)_src;
  612. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  613. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  614. for (y = 0; y < height; y++) {
  615. for (x = 0; x < width; x++)
  616. dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  617. src += srcstride;
  618. dst += dststride;
  619. }
  620. }
  621. static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
  622. ptrdiff_t dststride,
  623. uint8_t *_src,
  624. ptrdiff_t _srcstride,
  625. int height, intptr_t mx,
  626. intptr_t my, int width)
  627. {
  628. int x, y;
  629. const int8_t *filter;
  630. pixel *src = (pixel*)_src;
  631. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  632. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  633. int16_t *tmp = tmp_array;
  634. src -= QPEL_EXTRA_BEFORE * srcstride;
  635. filter = ff_hevc_qpel_filters[mx - 1];
  636. for (y = 0; y < height + QPEL_EXTRA; y++) {
  637. for (x = 0; x < width; x++)
  638. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  639. src += srcstride;
  640. tmp += MAX_PB_SIZE;
  641. }
  642. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  643. filter = ff_hevc_qpel_filters[my - 1];
  644. for (y = 0; y < height; y++) {
  645. for (x = 0; x < width; x++)
  646. dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  647. tmp += MAX_PB_SIZE;
  648. dst += dststride;
  649. }
  650. }
  651. static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
  652. uint8_t *_src, ptrdiff_t _srcstride,
  653. int height, intptr_t mx, intptr_t my, int width)
  654. {
  655. int x, y;
  656. pixel *src = (pixel*)_src;
  657. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  658. pixel *dst = (pixel *)_dst;
  659. ptrdiff_t dststride = _dststride / sizeof(pixel);
  660. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  661. int shift = 14 - BIT_DEPTH;
  662. #if BIT_DEPTH < 14
  663. int offset = 1 << (shift - 1);
  664. #else
  665. int offset = 0;
  666. #endif
  667. for (y = 0; y < height; y++) {
  668. for (x = 0; x < width; x++)
  669. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  670. src += srcstride;
  671. dst += dststride;
  672. }
  673. }
  674. static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  675. int16_t *src2, ptrdiff_t src2stride,
  676. int height, intptr_t mx, intptr_t my, int width)
  677. {
  678. int x, y;
  679. pixel *src = (pixel*)_src;
  680. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  681. pixel *dst = (pixel *)_dst;
  682. ptrdiff_t dststride = _dststride / sizeof(pixel);
  683. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  684. int shift = 14 + 1 - BIT_DEPTH;
  685. #if BIT_DEPTH < 14
  686. int offset = 1 << (shift - 1);
  687. #else
  688. int offset = 0;
  689. #endif
  690. for (y = 0; y < height; y++) {
  691. for (x = 0; x < width; x++)
  692. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  693. src += srcstride;
  694. dst += dststride;
  695. src2 += src2stride;
  696. }
  697. }
  698. static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
  699. uint8_t *_src, ptrdiff_t _srcstride,
  700. int height, intptr_t mx, intptr_t my, int width)
  701. {
  702. int x, y;
  703. pixel *src = (pixel*)_src;
  704. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  705. pixel *dst = (pixel *)_dst;
  706. ptrdiff_t dststride = _dststride / sizeof(pixel);
  707. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  708. int shift = 14 - BIT_DEPTH;
  709. #if BIT_DEPTH < 14
  710. int offset = 1 << (shift - 1);
  711. #else
  712. int offset = 0;
  713. #endif
  714. for (y = 0; y < height; y++) {
  715. for (x = 0; x < width; x++)
  716. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  717. src += srcstride;
  718. dst += dststride;
  719. }
  720. }
  721. static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  722. int16_t *src2, ptrdiff_t src2stride,
  723. int height, intptr_t mx, intptr_t my, int width)
  724. {
  725. int x, y;
  726. pixel *src = (pixel*)_src;
  727. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  728. pixel *dst = (pixel *)_dst;
  729. ptrdiff_t dststride = _dststride / sizeof(pixel);
  730. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  731. int shift = 14 + 1 - BIT_DEPTH;
  732. #if BIT_DEPTH < 14
  733. int offset = 1 << (shift - 1);
  734. #else
  735. int offset = 0;
  736. #endif
  737. for (y = 0; y < height; y++) {
  738. for (x = 0; x < width; x++)
  739. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  740. src += srcstride;
  741. dst += dststride;
  742. src2 += src2stride;
  743. }
  744. }
  745. static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  746. uint8_t *_src, ptrdiff_t _srcstride,
  747. int height, intptr_t mx, intptr_t my, int width)
  748. {
  749. int x, y;
  750. const int8_t *filter;
  751. pixel *src = (pixel*)_src;
  752. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  753. pixel *dst = (pixel *)_dst;
  754. ptrdiff_t dststride = _dststride / sizeof(pixel);
  755. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  756. int16_t *tmp = tmp_array;
  757. int shift = 14 - BIT_DEPTH;
  758. #if BIT_DEPTH < 14
  759. int offset = 1 << (shift - 1);
  760. #else
  761. int offset = 0;
  762. #endif
  763. src -= QPEL_EXTRA_BEFORE * srcstride;
  764. filter = ff_hevc_qpel_filters[mx - 1];
  765. for (y = 0; y < height + QPEL_EXTRA; y++) {
  766. for (x = 0; x < width; x++)
  767. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  768. src += srcstride;
  769. tmp += MAX_PB_SIZE;
  770. }
  771. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  772. filter = ff_hevc_qpel_filters[my - 1];
  773. for (y = 0; y < height; y++) {
  774. for (x = 0; x < width; x++)
  775. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  776. tmp += MAX_PB_SIZE;
  777. dst += dststride;
  778. }
  779. }
  780. static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  781. int16_t *src2, ptrdiff_t src2stride,
  782. int height, intptr_t mx, intptr_t my, int width)
  783. {
  784. int x, y;
  785. const int8_t *filter;
  786. pixel *src = (pixel*)_src;
  787. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  788. pixel *dst = (pixel *)_dst;
  789. ptrdiff_t dststride = _dststride / sizeof(pixel);
  790. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  791. int16_t *tmp = tmp_array;
  792. int shift = 14 + 1 - BIT_DEPTH;
  793. #if BIT_DEPTH < 14
  794. int offset = 1 << (shift - 1);
  795. #else
  796. int offset = 0;
  797. #endif
  798. src -= QPEL_EXTRA_BEFORE * srcstride;
  799. filter = ff_hevc_qpel_filters[mx - 1];
  800. for (y = 0; y < height + QPEL_EXTRA; y++) {
  801. for (x = 0; x < width; x++)
  802. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  803. src += srcstride;
  804. tmp += MAX_PB_SIZE;
  805. }
  806. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  807. filter = ff_hevc_qpel_filters[my - 1];
  808. for (y = 0; y < height; y++) {
  809. for (x = 0; x < width; x++)
  810. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  811. tmp += MAX_PB_SIZE;
  812. dst += dststride;
  813. src2 += src2stride;
  814. }
  815. }
  816. static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
  817. uint8_t *_src, ptrdiff_t _srcstride,
  818. int height, int denom, int wx, int ox,
  819. intptr_t mx, intptr_t my, int width)
  820. {
  821. int x, y;
  822. pixel *src = (pixel*)_src;
  823. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  824. pixel *dst = (pixel *)_dst;
  825. ptrdiff_t dststride = _dststride / sizeof(pixel);
  826. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  827. int shift = denom + 14 - BIT_DEPTH;
  828. #if BIT_DEPTH < 14
  829. int offset = 1 << (shift - 1);
  830. #else
  831. int offset = 0;
  832. #endif
  833. ox = ox * (1 << (BIT_DEPTH - 8));
  834. for (y = 0; y < height; y++) {
  835. for (x = 0; x < width; x++)
  836. dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  837. src += srcstride;
  838. dst += dststride;
  839. }
  840. }
  841. static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  842. int16_t *src2, ptrdiff_t src2stride,
  843. int height, int denom, int wx0, int wx1,
  844. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  845. {
  846. int x, y;
  847. pixel *src = (pixel*)_src;
  848. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  849. pixel *dst = (pixel *)_dst;
  850. ptrdiff_t dststride = _dststride / sizeof(pixel);
  851. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  852. int shift = 14 + 1 - BIT_DEPTH;
  853. int log2Wd = denom + shift - 1;
  854. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  855. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  856. for (y = 0; y < height; y++) {
  857. for (x = 0; x < width; x++)
  858. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  859. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  860. src += srcstride;
  861. dst += dststride;
  862. src2 += src2stride;
  863. }
  864. }
  865. static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
  866. uint8_t *_src, ptrdiff_t _srcstride,
  867. int height, int denom, int wx, int ox,
  868. intptr_t mx, intptr_t my, int width)
  869. {
  870. int x, y;
  871. pixel *src = (pixel*)_src;
  872. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  873. pixel *dst = (pixel *)_dst;
  874. ptrdiff_t dststride = _dststride / sizeof(pixel);
  875. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  876. int shift = denom + 14 - BIT_DEPTH;
  877. #if BIT_DEPTH < 14
  878. int offset = 1 << (shift - 1);
  879. #else
  880. int offset = 0;
  881. #endif
  882. ox = ox * (1 << (BIT_DEPTH - 8));
  883. for (y = 0; y < height; y++) {
  884. for (x = 0; x < width; x++)
  885. dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  886. src += srcstride;
  887. dst += dststride;
  888. }
  889. }
  890. static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  891. int16_t *src2, ptrdiff_t src2stride,
  892. int height, int denom, int wx0, int wx1,
  893. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  894. {
  895. int x, y;
  896. pixel *src = (pixel*)_src;
  897. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  898. pixel *dst = (pixel *)_dst;
  899. ptrdiff_t dststride = _dststride / sizeof(pixel);
  900. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  901. int shift = 14 + 1 - BIT_DEPTH;
  902. int log2Wd = denom + shift - 1;
  903. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  904. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  905. for (y = 0; y < height; y++) {
  906. for (x = 0; x < width; x++)
  907. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  908. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  909. src += srcstride;
  910. dst += dststride;
  911. src2 += src2stride;
  912. }
  913. }
  914. static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  915. uint8_t *_src, ptrdiff_t _srcstride,
  916. int height, int denom, int wx, int ox,
  917. intptr_t mx, intptr_t my, int width)
  918. {
  919. int x, y;
  920. const int8_t *filter;
  921. pixel *src = (pixel*)_src;
  922. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  923. pixel *dst = (pixel *)_dst;
  924. ptrdiff_t dststride = _dststride / sizeof(pixel);
  925. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  926. int16_t *tmp = tmp_array;
  927. int shift = denom + 14 - BIT_DEPTH;
  928. #if BIT_DEPTH < 14
  929. int offset = 1 << (shift - 1);
  930. #else
  931. int offset = 0;
  932. #endif
  933. src -= QPEL_EXTRA_BEFORE * srcstride;
  934. filter = ff_hevc_qpel_filters[mx - 1];
  935. for (y = 0; y < height + QPEL_EXTRA; y++) {
  936. for (x = 0; x < width; x++)
  937. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  938. src += srcstride;
  939. tmp += MAX_PB_SIZE;
  940. }
  941. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  942. filter = ff_hevc_qpel_filters[my - 1];
  943. ox = ox * (1 << (BIT_DEPTH - 8));
  944. for (y = 0; y < height; y++) {
  945. for (x = 0; x < width; x++)
  946. dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  947. tmp += MAX_PB_SIZE;
  948. dst += dststride;
  949. }
  950. }
  951. static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  952. int16_t *src2, ptrdiff_t src2stride,
  953. int height, int denom, int wx0, int wx1,
  954. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  955. {
  956. int x, y;
  957. const int8_t *filter;
  958. pixel *src = (pixel*)_src;
  959. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  960. pixel *dst = (pixel *)_dst;
  961. ptrdiff_t dststride = _dststride / sizeof(pixel);
  962. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  963. int16_t *tmp = tmp_array;
  964. int shift = 14 + 1 - BIT_DEPTH;
  965. int log2Wd = denom + shift - 1;
  966. src -= QPEL_EXTRA_BEFORE * srcstride;
  967. filter = ff_hevc_qpel_filters[mx - 1];
  968. for (y = 0; y < height + QPEL_EXTRA; y++) {
  969. for (x = 0; x < width; x++)
  970. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  971. src += srcstride;
  972. tmp += MAX_PB_SIZE;
  973. }
  974. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  975. filter = ff_hevc_qpel_filters[my - 1];
  976. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  977. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  978. for (y = 0; y < height; y++) {
  979. for (x = 0; x < width; x++)
  980. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  981. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  982. tmp += MAX_PB_SIZE;
  983. dst += dststride;
  984. src2 += src2stride;
  985. }
  986. }
  987. ////////////////////////////////////////////////////////////////////////////////
  988. //
  989. ////////////////////////////////////////////////////////////////////////////////
  990. #define EPEL_FILTER(src, stride) \
  991. (filter[0] * src[x - stride] + \
  992. filter[1] * src[x] + \
  993. filter[2] * src[x + stride] + \
  994. filter[3] * src[x + 2 * stride])
  995. static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
  996. uint8_t *_src, ptrdiff_t _srcstride,
  997. int height, intptr_t mx, intptr_t my, int width)
  998. {
  999. int x, y;
  1000. pixel *src = (pixel *)_src;
  1001. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1002. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1003. for (y = 0; y < height; y++) {
  1004. for (x = 0; x < width; x++)
  1005. dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1006. src += srcstride;
  1007. dst += dststride;
  1008. }
  1009. }
  1010. static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
  1011. uint8_t *_src, ptrdiff_t _srcstride,
  1012. int height, intptr_t mx, intptr_t my, int width)
  1013. {
  1014. int x, y;
  1015. pixel *src = (pixel *)_src;
  1016. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1017. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1018. for (y = 0; y < height; y++) {
  1019. for (x = 0; x < width; x++)
  1020. dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  1021. src += srcstride;
  1022. dst += dststride;
  1023. }
  1024. }
  1025. static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
  1026. uint8_t *_src, ptrdiff_t _srcstride,
  1027. int height, intptr_t mx, intptr_t my, int width)
  1028. {
  1029. int x, y;
  1030. pixel *src = (pixel *)_src;
  1031. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1032. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1033. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1034. int16_t *tmp = tmp_array;
  1035. src -= EPEL_EXTRA_BEFORE * srcstride;
  1036. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1037. for (x = 0; x < width; x++)
  1038. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1039. src += srcstride;
  1040. tmp += MAX_PB_SIZE;
  1041. }
  1042. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1043. filter = ff_hevc_epel_filters[my - 1];
  1044. for (y = 0; y < height; y++) {
  1045. for (x = 0; x < width; x++)
  1046. dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  1047. tmp += MAX_PB_SIZE;
  1048. dst += dststride;
  1049. }
  1050. }
  1051. static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1052. int height, intptr_t mx, intptr_t my, int width)
  1053. {
  1054. int x, y;
  1055. pixel *src = (pixel *)_src;
  1056. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1057. pixel *dst = (pixel *)_dst;
  1058. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1059. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1060. int shift = 14 - BIT_DEPTH;
  1061. #if BIT_DEPTH < 14
  1062. int offset = 1 << (shift - 1);
  1063. #else
  1064. int offset = 0;
  1065. #endif
  1066. for (y = 0; y < height; y++) {
  1067. for (x = 0; x < width; x++)
  1068. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1069. src += srcstride;
  1070. dst += dststride;
  1071. }
  1072. }
  1073. static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1074. int16_t *src2, ptrdiff_t src2stride,
  1075. int height, intptr_t mx, intptr_t my, int width)
  1076. {
  1077. int x, y;
  1078. pixel *src = (pixel *)_src;
  1079. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1080. pixel *dst = (pixel *)_dst;
  1081. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1082. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1083. int shift = 14 + 1 - BIT_DEPTH;
  1084. #if BIT_DEPTH < 14
  1085. int offset = 1 << (shift - 1);
  1086. #else
  1087. int offset = 0;
  1088. #endif
  1089. for (y = 0; y < height; y++) {
  1090. for (x = 0; x < width; x++) {
  1091. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1092. }
  1093. dst += dststride;
  1094. src += srcstride;
  1095. src2 += src2stride;
  1096. }
  1097. }
  1098. static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1099. int height, intptr_t mx, intptr_t my, int width)
  1100. {
  1101. int x, y;
  1102. pixel *src = (pixel *)_src;
  1103. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1104. pixel *dst = (pixel *)_dst;
  1105. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1106. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1107. int shift = 14 - BIT_DEPTH;
  1108. #if BIT_DEPTH < 14
  1109. int offset = 1 << (shift - 1);
  1110. #else
  1111. int offset = 0;
  1112. #endif
  1113. for (y = 0; y < height; y++) {
  1114. for (x = 0; x < width; x++)
  1115. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1116. src += srcstride;
  1117. dst += dststride;
  1118. }
  1119. }
  1120. static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1121. int16_t *src2, ptrdiff_t src2stride,
  1122. int height, intptr_t mx, intptr_t my, int width)
  1123. {
  1124. int x, y;
  1125. pixel *src = (pixel *)_src;
  1126. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1127. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1128. pixel *dst = (pixel *)_dst;
  1129. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1130. int shift = 14 + 1 - BIT_DEPTH;
  1131. #if BIT_DEPTH < 14
  1132. int offset = 1 << (shift - 1);
  1133. #else
  1134. int offset = 0;
  1135. #endif
  1136. for (y = 0; y < height; y++) {
  1137. for (x = 0; x < width; x++)
  1138. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1139. dst += dststride;
  1140. src += srcstride;
  1141. src2 += src2stride;
  1142. }
  1143. }
  1144. static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1145. int height, intptr_t mx, intptr_t my, int width)
  1146. {
  1147. int x, y;
  1148. pixel *src = (pixel *)_src;
  1149. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1150. pixel *dst = (pixel *)_dst;
  1151. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1152. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1153. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1154. int16_t *tmp = tmp_array;
  1155. int shift = 14 - BIT_DEPTH;
  1156. #if BIT_DEPTH < 14
  1157. int offset = 1 << (shift - 1);
  1158. #else
  1159. int offset = 0;
  1160. #endif
  1161. src -= EPEL_EXTRA_BEFORE * srcstride;
  1162. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1163. for (x = 0; x < width; x++)
  1164. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1165. src += srcstride;
  1166. tmp += MAX_PB_SIZE;
  1167. }
  1168. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1169. filter = ff_hevc_epel_filters[my - 1];
  1170. for (y = 0; y < height; y++) {
  1171. for (x = 0; x < width; x++)
  1172. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  1173. tmp += MAX_PB_SIZE;
  1174. dst += dststride;
  1175. }
  1176. }
  1177. static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1178. int16_t *src2, ptrdiff_t src2stride,
  1179. int height, intptr_t mx, intptr_t my, int width)
  1180. {
  1181. int x, y;
  1182. pixel *src = (pixel *)_src;
  1183. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1184. pixel *dst = (pixel *)_dst;
  1185. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1186. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1187. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1188. int16_t *tmp = tmp_array;
  1189. int shift = 14 + 1 - BIT_DEPTH;
  1190. #if BIT_DEPTH < 14
  1191. int offset = 1 << (shift - 1);
  1192. #else
  1193. int offset = 0;
  1194. #endif
  1195. src -= EPEL_EXTRA_BEFORE * srcstride;
  1196. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1197. for (x = 0; x < width; x++)
  1198. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1199. src += srcstride;
  1200. tmp += MAX_PB_SIZE;
  1201. }
  1202. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1203. filter = ff_hevc_epel_filters[my - 1];
  1204. for (y = 0; y < height; y++) {
  1205. for (x = 0; x < width; x++)
  1206. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  1207. tmp += MAX_PB_SIZE;
  1208. dst += dststride;
  1209. src2 += src2stride;
  1210. }
  1211. }
  1212. static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1213. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1214. {
  1215. int x, y;
  1216. pixel *src = (pixel *)_src;
  1217. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1218. pixel *dst = (pixel *)_dst;
  1219. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1220. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1221. int shift = denom + 14 - BIT_DEPTH;
  1222. #if BIT_DEPTH < 14
  1223. int offset = 1 << (shift - 1);
  1224. #else
  1225. int offset = 0;
  1226. #endif
  1227. ox = ox * (1 << (BIT_DEPTH - 8));
  1228. for (y = 0; y < height; y++) {
  1229. for (x = 0; x < width; x++) {
  1230. dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1231. }
  1232. dst += dststride;
  1233. src += srcstride;
  1234. }
  1235. }
  1236. static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1237. int16_t *src2, ptrdiff_t src2stride,
  1238. int height, int denom, int wx0, int wx1,
  1239. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1240. {
  1241. int x, y;
  1242. pixel *src = (pixel *)_src;
  1243. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1244. pixel *dst = (pixel *)_dst;
  1245. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1246. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1247. int shift = 14 + 1 - BIT_DEPTH;
  1248. int log2Wd = denom + shift - 1;
  1249. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1250. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1251. for (y = 0; y < height; y++) {
  1252. for (x = 0; x < width; x++)
  1253. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1254. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  1255. src += srcstride;
  1256. dst += dststride;
  1257. src2 += src2stride;
  1258. }
  1259. }
  1260. static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1261. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1262. {
  1263. int x, y;
  1264. pixel *src = (pixel *)_src;
  1265. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1266. pixel *dst = (pixel *)_dst;
  1267. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1268. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1269. int shift = denom + 14 - BIT_DEPTH;
  1270. #if BIT_DEPTH < 14
  1271. int offset = 1 << (shift - 1);
  1272. #else
  1273. int offset = 0;
  1274. #endif
  1275. ox = ox * (1 << (BIT_DEPTH - 8));
  1276. for (y = 0; y < height; y++) {
  1277. for (x = 0; x < width; x++) {
  1278. dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1279. }
  1280. dst += dststride;
  1281. src += srcstride;
  1282. }
  1283. }
  1284. static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1285. int16_t *src2, ptrdiff_t src2stride,
  1286. int height, int denom, int wx0, int wx1,
  1287. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1288. {
  1289. int x, y;
  1290. pixel *src = (pixel *)_src;
  1291. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1292. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1293. pixel *dst = (pixel *)_dst;
  1294. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1295. int shift = 14 + 1 - BIT_DEPTH;
  1296. int log2Wd = denom + shift - 1;
  1297. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1298. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1299. for (y = 0; y < height; y++) {
  1300. for (x = 0; x < width; x++)
  1301. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1302. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  1303. src += srcstride;
  1304. dst += dststride;
  1305. src2 += src2stride;
  1306. }
  1307. }
  1308. static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1309. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1310. {
  1311. int x, y;
  1312. pixel *src = (pixel *)_src;
  1313. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1314. pixel *dst = (pixel *)_dst;
  1315. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1316. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1317. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1318. int16_t *tmp = tmp_array;
  1319. int shift = denom + 14 - BIT_DEPTH;
  1320. #if BIT_DEPTH < 14
  1321. int offset = 1 << (shift - 1);
  1322. #else
  1323. int offset = 0;
  1324. #endif
  1325. src -= EPEL_EXTRA_BEFORE * srcstride;
  1326. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1327. for (x = 0; x < width; x++)
  1328. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1329. src += srcstride;
  1330. tmp += MAX_PB_SIZE;
  1331. }
  1332. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1333. filter = ff_hevc_epel_filters[my - 1];
  1334. ox = ox * (1 << (BIT_DEPTH - 8));
  1335. for (y = 0; y < height; y++) {
  1336. for (x = 0; x < width; x++)
  1337. dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  1338. tmp += MAX_PB_SIZE;
  1339. dst += dststride;
  1340. }
  1341. }
  1342. static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1343. int16_t *src2, ptrdiff_t src2stride,
  1344. int height, int denom, int wx0, int wx1,
  1345. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1346. {
  1347. int x, y;
  1348. pixel *src = (pixel *)_src;
  1349. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1350. pixel *dst = (pixel *)_dst;
  1351. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1352. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1353. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1354. int16_t *tmp = tmp_array;
  1355. int shift = 14 + 1 - BIT_DEPTH;
  1356. int log2Wd = denom + shift - 1;
  1357. src -= EPEL_EXTRA_BEFORE * srcstride;
  1358. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1359. for (x = 0; x < width; x++)
  1360. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1361. src += srcstride;
  1362. tmp += MAX_PB_SIZE;
  1363. }
  1364. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1365. filter = ff_hevc_epel_filters[my - 1];
  1366. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1367. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1368. for (y = 0; y < height; y++) {
  1369. for (x = 0; x < width; x++)
  1370. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  1371. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  1372. tmp += MAX_PB_SIZE;
  1373. dst += dststride;
  1374. src2 += src2stride;
  1375. }
  1376. }// line zero
  1377. #define P3 pix[-4 * xstride]
  1378. #define P2 pix[-3 * xstride]
  1379. #define P1 pix[-2 * xstride]
  1380. #define P0 pix[-1 * xstride]
  1381. #define Q0 pix[0 * xstride]
  1382. #define Q1 pix[1 * xstride]
  1383. #define Q2 pix[2 * xstride]
  1384. #define Q3 pix[3 * xstride]
  1385. // line three. used only for deblocking decision
  1386. #define TP3 pix[-4 * xstride + 3 * ystride]
  1387. #define TP2 pix[-3 * xstride + 3 * ystride]
  1388. #define TP1 pix[-2 * xstride + 3 * ystride]
  1389. #define TP0 pix[-1 * xstride + 3 * ystride]
  1390. #define TQ0 pix[0 * xstride + 3 * ystride]
  1391. #define TQ1 pix[1 * xstride + 3 * ystride]
  1392. #define TQ2 pix[2 * xstride + 3 * ystride]
  1393. #define TQ3 pix[3 * xstride + 3 * ystride]
  1394. static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
  1395. ptrdiff_t _xstride, ptrdiff_t _ystride,
  1396. int beta, int *_tc,
  1397. uint8_t *_no_p, uint8_t *_no_q)
  1398. {
  1399. int d, j;
  1400. pixel *pix = (pixel *)_pix;
  1401. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1402. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1403. beta <<= BIT_DEPTH - 8;
  1404. for (j = 0; j < 2; j++) {
  1405. const int dp0 = abs(P2 - 2 * P1 + P0);
  1406. const int dq0 = abs(Q2 - 2 * Q1 + Q0);
  1407. const int dp3 = abs(TP2 - 2 * TP1 + TP0);
  1408. const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
  1409. const int d0 = dp0 + dq0;
  1410. const int d3 = dp3 + dq3;
  1411. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1412. const int no_p = _no_p[j];
  1413. const int no_q = _no_q[j];
  1414. if (d0 + d3 >= beta) {
  1415. pix += 4 * ystride;
  1416. continue;
  1417. } else {
  1418. const int beta_3 = beta >> 3;
  1419. const int beta_2 = beta >> 2;
  1420. const int tc25 = ((tc * 5 + 1) >> 1);
  1421. if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
  1422. abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
  1423. (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
  1424. // strong filtering
  1425. const int tc2 = tc << 1;
  1426. for (d = 0; d < 4; d++) {
  1427. const int p3 = P3;
  1428. const int p2 = P2;
  1429. const int p1 = P1;
  1430. const int p0 = P0;
  1431. const int q0 = Q0;
  1432. const int q1 = Q1;
  1433. const int q2 = Q2;
  1434. const int q3 = Q3;
  1435. if (!no_p) {
  1436. P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
  1437. P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
  1438. P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
  1439. }
  1440. if (!no_q) {
  1441. Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
  1442. Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
  1443. Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
  1444. }
  1445. pix += ystride;
  1446. }
  1447. } else { // normal filtering
  1448. int nd_p = 1;
  1449. int nd_q = 1;
  1450. const int tc_2 = tc >> 1;
  1451. if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
  1452. nd_p = 2;
  1453. if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
  1454. nd_q = 2;
  1455. for (d = 0; d < 4; d++) {
  1456. const int p2 = P2;
  1457. const int p1 = P1;
  1458. const int p0 = P0;
  1459. const int q0 = Q0;
  1460. const int q1 = Q1;
  1461. const int q2 = Q2;
  1462. int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
  1463. if (abs(delta0) < 10 * tc) {
  1464. delta0 = av_clip(delta0, -tc, tc);
  1465. if (!no_p)
  1466. P0 = av_clip_pixel(p0 + delta0);
  1467. if (!no_q)
  1468. Q0 = av_clip_pixel(q0 - delta0);
  1469. if (!no_p && nd_p > 1) {
  1470. const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
  1471. P1 = av_clip_pixel(p1 + deltap1);
  1472. }
  1473. if (!no_q && nd_q > 1) {
  1474. const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
  1475. Q1 = av_clip_pixel(q1 + deltaq1);
  1476. }
  1477. }
  1478. pix += ystride;
  1479. }
  1480. }
  1481. }
  1482. }
  1483. }
  1484. static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
  1485. ptrdiff_t _ystride, int *_tc,
  1486. uint8_t *_no_p, uint8_t *_no_q)
  1487. {
  1488. int d, j, no_p, no_q;
  1489. pixel *pix = (pixel *)_pix;
  1490. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1491. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1492. for (j = 0; j < 2; j++) {
  1493. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1494. if (tc <= 0) {
  1495. pix += 4 * ystride;
  1496. continue;
  1497. }
  1498. no_p = _no_p[j];
  1499. no_q = _no_q[j];
  1500. for (d = 0; d < 4; d++) {
  1501. int delta0;
  1502. const int p1 = P1;
  1503. const int p0 = P0;
  1504. const int q0 = Q0;
  1505. const int q1 = Q1;
  1506. delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
  1507. if (!no_p)
  1508. P0 = av_clip_pixel(p0 + delta0);
  1509. if (!no_q)
  1510. Q0 = av_clip_pixel(q0 - delta0);
  1511. pix += ystride;
  1512. }
  1513. }
  1514. }
  1515. static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1516. int *tc, uint8_t *no_p,
  1517. uint8_t *no_q)
  1518. {
  1519. FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
  1520. }
  1521. static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1522. int *tc, uint8_t *no_p,
  1523. uint8_t *no_q)
  1524. {
  1525. FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
  1526. }
  1527. static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1528. int beta, int *tc, uint8_t *no_p,
  1529. uint8_t *no_q)
  1530. {
  1531. FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
  1532. beta, tc, no_p, no_q);
  1533. }
  1534. static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1535. int beta, int *tc, uint8_t *no_p,
  1536. uint8_t *no_q)
  1537. {
  1538. FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
  1539. beta, tc, no_p, no_q);
  1540. }
  1541. #undef P3
  1542. #undef P2
  1543. #undef P1
  1544. #undef P0
  1545. #undef Q0
  1546. #undef Q1
  1547. #undef Q2
  1548. #undef Q3
  1549. #undef TP3
  1550. #undef TP2
  1551. #undef TP1
  1552. #undef TP0
  1553. #undef TQ0
  1554. #undef TQ1
  1555. #undef TQ2
  1556. #undef TQ3