You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1693 lines
61KB

  1. /*
  2. * HEVC video decoder
  3. *
  4. * Copyright (C) 2012 - 2013 Guillaume Martres
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "get_bits.h"
  23. #include "hevcdec.h"
  24. #include "bit_depth_template.c"
  25. #include "hevcdsp.h"
  26. static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
  27. GetBitContext *gb, int pcm_bit_depth)
  28. {
  29. int x, y;
  30. pixel *dst = (pixel *)_dst;
  31. stride /= sizeof(pixel);
  32. for (y = 0; y < height; y++) {
  33. for (x = 0; x < width; x++)
  34. dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
  35. dst += stride;
  36. }
  37. }
  38. static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
  39. ptrdiff_t stride, int size)
  40. {
  41. int x, y;
  42. pixel *dst = (pixel *)_dst;
  43. stride /= sizeof(pixel);
  44. for (y = 0; y < size; y++) {
  45. for (x = 0; x < size; x++) {
  46. dst[x] = av_clip_pixel(dst[x] + *res);
  47. res++;
  48. }
  49. dst += stride;
  50. }
  51. }
  52. static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
  53. ptrdiff_t stride)
  54. {
  55. FUNC(add_residual)(_dst, res, stride, 4);
  56. }
  57. static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
  58. ptrdiff_t stride)
  59. {
  60. FUNC(add_residual)(_dst, res, stride, 8);
  61. }
  62. static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
  63. ptrdiff_t stride)
  64. {
  65. FUNC(add_residual)(_dst, res, stride, 16);
  66. }
  67. static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
  68. ptrdiff_t stride)
  69. {
  70. FUNC(add_residual)(_dst, res, stride, 32);
  71. }
  72. static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
  73. {
  74. int16_t *coeffs = (int16_t *) _coeffs;
  75. int x, y;
  76. int size = 1 << log2_size;
  77. if (mode) {
  78. coeffs += size;
  79. for (y = 0; y < size - 1; y++) {
  80. for (x = 0; x < size; x++)
  81. coeffs[x] += coeffs[x - size];
  82. coeffs += size;
  83. }
  84. } else {
  85. for (y = 0; y < size; y++) {
  86. for (x = 1; x < size; x++)
  87. coeffs[x] += coeffs[x - 1];
  88. coeffs += size;
  89. }
  90. }
  91. }
  92. static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
  93. {
  94. int shift = 15 - BIT_DEPTH - log2_size;
  95. int x, y;
  96. int size = 1 << log2_size;
  97. if (shift > 0) {
  98. int offset = 1 << (shift - 1);
  99. for (y = 0; y < size; y++) {
  100. for (x = 0; x < size; x++) {
  101. *coeffs = (*coeffs + offset) >> shift;
  102. coeffs++;
  103. }
  104. }
  105. } else {
  106. for (y = 0; y < size; y++) {
  107. for (x = 0; x < size; x++) {
  108. *coeffs = *coeffs << -shift;
  109. coeffs++;
  110. }
  111. }
  112. }
  113. }
  114. #define SET(dst, x) (dst) = (x)
  115. #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
  116. #define TR_4x4_LUMA(dst, src, step, assign) \
  117. do { \
  118. int c0 = src[0 * step] + src[2 * step]; \
  119. int c1 = src[2 * step] + src[3 * step]; \
  120. int c2 = src[0 * step] - src[3 * step]; \
  121. int c3 = 74 * src[1 * step]; \
  122. \
  123. assign(dst[2 * step], 74 * (src[0 * step] - \
  124. src[2 * step] + \
  125. src[3 * step])); \
  126. assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
  127. assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
  128. assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
  129. } while (0)
  130. static void FUNC(transform_4x4_luma)(int16_t *coeffs)
  131. {
  132. int i;
  133. int shift = 7;
  134. int add = 1 << (shift - 1);
  135. int16_t *src = coeffs;
  136. for (i = 0; i < 4; i++) {
  137. TR_4x4_LUMA(src, src, 4, SCALE);
  138. src++;
  139. }
  140. shift = 20 - BIT_DEPTH;
  141. add = 1 << (shift - 1);
  142. for (i = 0; i < 4; i++) {
  143. TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
  144. coeffs += 4;
  145. }
  146. }
  147. #undef TR_4x4_LUMA
  148. #define TR_4(dst, src, dstep, sstep, assign, end) \
  149. do { \
  150. const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
  151. const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
  152. const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
  153. const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
  154. \
  155. assign(dst[0 * dstep], e0 + o0); \
  156. assign(dst[1 * dstep], e1 + o1); \
  157. assign(dst[2 * dstep], e1 - o1); \
  158. assign(dst[3 * dstep], e0 - o0); \
  159. } while (0)
  160. #define TR_8(dst, src, dstep, sstep, assign, end) \
  161. do { \
  162. int i, j; \
  163. int e_8[4]; \
  164. int o_8[4] = { 0 }; \
  165. for (i = 0; i < 4; i++) \
  166. for (j = 1; j < end; j += 2) \
  167. o_8[i] += transform[4 * j][i] * src[j * sstep]; \
  168. TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
  169. \
  170. for (i = 0; i < 4; i++) { \
  171. assign(dst[i * dstep], e_8[i] + o_8[i]); \
  172. assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
  173. } \
  174. } while (0)
  175. #define TR_16(dst, src, dstep, sstep, assign, end) \
  176. do { \
  177. int i, j; \
  178. int e_16[8]; \
  179. int o_16[8] = { 0 }; \
  180. for (i = 0; i < 8; i++) \
  181. for (j = 1; j < end; j += 2) \
  182. o_16[i] += transform[2 * j][i] * src[j * sstep]; \
  183. TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
  184. \
  185. for (i = 0; i < 8; i++) { \
  186. assign(dst[i * dstep], e_16[i] + o_16[i]); \
  187. assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
  188. } \
  189. } while (0)
  190. #define TR_32(dst, src, dstep, sstep, assign, end) \
  191. do { \
  192. int i, j; \
  193. int e_32[16]; \
  194. int o_32[16] = { 0 }; \
  195. for (i = 0; i < 16; i++) \
  196. for (j = 1; j < end; j += 2) \
  197. o_32[i] += transform[j][i] * src[j * sstep]; \
  198. TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
  199. \
  200. for (i = 0; i < 16; i++) { \
  201. assign(dst[i * dstep], e_32[i] + o_32[i]); \
  202. assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
  203. } \
  204. } while (0)
  205. #define IDCT_VAR4(H) \
  206. int limit2 = FFMIN(col_limit + 4, H)
  207. #define IDCT_VAR8(H) \
  208. int limit = FFMIN(col_limit, H); \
  209. int limit2 = FFMIN(col_limit + 4, H)
  210. #define IDCT_VAR16(H) IDCT_VAR8(H)
  211. #define IDCT_VAR32(H) IDCT_VAR8(H)
  212. #define IDCT(H) \
  213. static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
  214. int col_limit) \
  215. { \
  216. int i; \
  217. int shift = 7; \
  218. int add = 1 << (shift - 1); \
  219. int16_t *src = coeffs; \
  220. IDCT_VAR ## H(H); \
  221. \
  222. for (i = 0; i < H; i++) { \
  223. TR_ ## H(src, src, H, H, SCALE, limit2); \
  224. if (limit2 < H && i%4 == 0 && !!i) \
  225. limit2 -= 4; \
  226. src++; \
  227. } \
  228. \
  229. shift = 20 - BIT_DEPTH; \
  230. add = 1 << (shift - 1); \
  231. for (i = 0; i < H; i++) { \
  232. TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
  233. coeffs += H; \
  234. } \
  235. }
  236. #define IDCT_DC(H) \
  237. static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
  238. { \
  239. int i, j; \
  240. int shift = 14 - BIT_DEPTH; \
  241. int add = 1 << (shift - 1); \
  242. int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
  243. \
  244. for (j = 0; j < H; j++) { \
  245. for (i = 0; i < H; i++) { \
  246. coeffs[i + j * H] = coeff; \
  247. } \
  248. } \
  249. }
  250. IDCT( 4)
  251. IDCT( 8)
  252. IDCT(16)
  253. IDCT(32)
  254. IDCT_DC( 4)
  255. IDCT_DC( 8)
  256. IDCT_DC(16)
  257. IDCT_DC(32)
  258. #undef TR_4
  259. #undef TR_8
  260. #undef TR_16
  261. #undef TR_32
  262. #undef SET
  263. #undef SCALE
  264. static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
  265. ptrdiff_t stride_dst, ptrdiff_t stride_src,
  266. int16_t *sao_offset_val, int sao_left_class,
  267. int width, int height)
  268. {
  269. pixel *dst = (pixel *)_dst;
  270. pixel *src = (pixel *)_src;
  271. int offset_table[32] = { 0 };
  272. int k, y, x;
  273. int shift = BIT_DEPTH - 5;
  274. stride_dst /= sizeof(pixel);
  275. stride_src /= sizeof(pixel);
  276. for (k = 0; k < 4; k++)
  277. offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
  278. for (y = 0; y < height; y++) {
  279. for (x = 0; x < width; x++)
  280. dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
  281. dst += stride_dst;
  282. src += stride_src;
  283. }
  284. }
  285. #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
  286. static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
  287. int eo, int width, int height) {
  288. static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
  289. static const int8_t pos[4][2][2] = {
  290. { { -1, 0 }, { 1, 0 } }, // horizontal
  291. { { 0, -1 }, { 0, 1 } }, // vertical
  292. { { -1, -1 }, { 1, 1 } }, // 45 degree
  293. { { 1, -1 }, { -1, 1 } }, // 135 degree
  294. };
  295. pixel *dst = (pixel *)_dst;
  296. pixel *src = (pixel *)_src;
  297. int a_stride, b_stride;
  298. int x, y;
  299. ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
  300. stride_dst /= sizeof(pixel);
  301. a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
  302. b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
  303. for (y = 0; y < height; y++) {
  304. for (x = 0; x < width; x++) {
  305. int diff0 = CMP(src[x], src[x + a_stride]);
  306. int diff1 = CMP(src[x], src[x + b_stride]);
  307. int offset_val = edge_idx[2 + diff0 + diff1];
  308. dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
  309. }
  310. src += stride_src;
  311. dst += stride_dst;
  312. }
  313. }
  314. static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
  315. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  316. int *borders, int _width, int _height,
  317. int c_idx, uint8_t *vert_edge,
  318. uint8_t *horiz_edge, uint8_t *diag_edge)
  319. {
  320. int x, y;
  321. pixel *dst = (pixel *)_dst;
  322. pixel *src = (pixel *)_src;
  323. int16_t *sao_offset_val = sao->offset_val[c_idx];
  324. int sao_eo_class = sao->eo_class[c_idx];
  325. int init_x = 0, width = _width, height = _height;
  326. stride_dst /= sizeof(pixel);
  327. stride_src /= sizeof(pixel);
  328. if (sao_eo_class != SAO_EO_VERT) {
  329. if (borders[0]) {
  330. int offset_val = sao_offset_val[0];
  331. for (y = 0; y < height; y++) {
  332. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  333. }
  334. init_x = 1;
  335. }
  336. if (borders[2]) {
  337. int offset_val = sao_offset_val[0];
  338. int offset = width - 1;
  339. for (x = 0; x < height; x++) {
  340. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  341. }
  342. width--;
  343. }
  344. }
  345. if (sao_eo_class != SAO_EO_HORIZ) {
  346. if (borders[1]) {
  347. int offset_val = sao_offset_val[0];
  348. for (x = init_x; x < width; x++)
  349. dst[x] = av_clip_pixel(src[x] + offset_val);
  350. }
  351. if (borders[3]) {
  352. int offset_val = sao_offset_val[0];
  353. ptrdiff_t y_stride_dst = stride_dst * (height - 1);
  354. ptrdiff_t y_stride_src = stride_src * (height - 1);
  355. for (x = init_x; x < width; x++)
  356. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  357. height--;
  358. }
  359. }
  360. }
  361. static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
  362. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  363. int *borders, int _width, int _height,
  364. int c_idx, uint8_t *vert_edge,
  365. uint8_t *horiz_edge, uint8_t *diag_edge)
  366. {
  367. int x, y;
  368. pixel *dst = (pixel *)_dst;
  369. pixel *src = (pixel *)_src;
  370. int16_t *sao_offset_val = sao->offset_val[c_idx];
  371. int sao_eo_class = sao->eo_class[c_idx];
  372. int init_x = 0, init_y = 0, width = _width, height = _height;
  373. stride_dst /= sizeof(pixel);
  374. stride_src /= sizeof(pixel);
  375. if (sao_eo_class != SAO_EO_VERT) {
  376. if (borders[0]) {
  377. int offset_val = sao_offset_val[0];
  378. for (y = 0; y < height; y++) {
  379. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  380. }
  381. init_x = 1;
  382. }
  383. if (borders[2]) {
  384. int offset_val = sao_offset_val[0];
  385. int offset = width - 1;
  386. for (x = 0; x < height; x++) {
  387. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  388. }
  389. width--;
  390. }
  391. }
  392. if (sao_eo_class != SAO_EO_HORIZ) {
  393. if (borders[1]) {
  394. int offset_val = sao_offset_val[0];
  395. for (x = init_x; x < width; x++)
  396. dst[x] = av_clip_pixel(src[x] + offset_val);
  397. init_y = 1;
  398. }
  399. if (borders[3]) {
  400. int offset_val = sao_offset_val[0];
  401. ptrdiff_t y_stride_dst = stride_dst * (height - 1);
  402. ptrdiff_t y_stride_src = stride_src * (height - 1);
  403. for (x = init_x; x < width; x++)
  404. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  405. height--;
  406. }
  407. }
  408. {
  409. int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
  410. int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
  411. int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
  412. int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
  413. // Restore pixels that can't be modified
  414. if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
  415. for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
  416. dst[y*stride_dst] = src[y*stride_src];
  417. }
  418. if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
  419. for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
  420. dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
  421. }
  422. if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
  423. for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
  424. dst[x] = src[x];
  425. }
  426. if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
  427. for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
  428. dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
  429. }
  430. if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
  431. dst[0] = src[0];
  432. if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
  433. dst[width-1] = src[width-1];
  434. if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
  435. dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
  436. if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
  437. dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
  438. }
  439. }
  440. #undef CMP
  441. ////////////////////////////////////////////////////////////////////////////////
  442. //
  443. ////////////////////////////////////////////////////////////////////////////////
  444. static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
  445. uint8_t *_src, ptrdiff_t _srcstride,
  446. int height, intptr_t mx, intptr_t my, int width)
  447. {
  448. int x, y;
  449. pixel *src = (pixel *)_src;
  450. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  451. for (y = 0; y < height; y++) {
  452. for (x = 0; x < width; x++)
  453. dst[x] = src[x] << (14 - BIT_DEPTH);
  454. src += srcstride;
  455. dst += MAX_PB_SIZE;
  456. }
  457. }
  458. static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  459. int height, intptr_t mx, intptr_t my, int width)
  460. {
  461. int y;
  462. pixel *src = (pixel *)_src;
  463. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  464. pixel *dst = (pixel *)_dst;
  465. ptrdiff_t dststride = _dststride / sizeof(pixel);
  466. for (y = 0; y < height; y++) {
  467. memcpy(dst, src, width * sizeof(pixel));
  468. src += srcstride;
  469. dst += dststride;
  470. }
  471. }
  472. static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  473. int16_t *src2,
  474. int height, intptr_t mx, intptr_t my, int width)
  475. {
  476. int x, y;
  477. pixel *src = (pixel *)_src;
  478. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  479. pixel *dst = (pixel *)_dst;
  480. ptrdiff_t dststride = _dststride / sizeof(pixel);
  481. int shift = 14 + 1 - BIT_DEPTH;
  482. #if BIT_DEPTH < 14
  483. int offset = 1 << (shift - 1);
  484. #else
  485. int offset = 0;
  486. #endif
  487. for (y = 0; y < height; y++) {
  488. for (x = 0; x < width; x++)
  489. dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
  490. src += srcstride;
  491. dst += dststride;
  492. src2 += MAX_PB_SIZE;
  493. }
  494. }
  495. static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  496. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  497. {
  498. int x, y;
  499. pixel *src = (pixel *)_src;
  500. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  501. pixel *dst = (pixel *)_dst;
  502. ptrdiff_t dststride = _dststride / sizeof(pixel);
  503. int shift = denom + 14 - BIT_DEPTH;
  504. #if BIT_DEPTH < 14
  505. int offset = 1 << (shift - 1);
  506. #else
  507. int offset = 0;
  508. #endif
  509. ox = ox * (1 << (BIT_DEPTH - 8));
  510. for (y = 0; y < height; y++) {
  511. for (x = 0; x < width; x++)
  512. dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
  513. src += srcstride;
  514. dst += dststride;
  515. }
  516. }
  517. static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  518. int16_t *src2,
  519. int height, int denom, int wx0, int wx1,
  520. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  521. {
  522. int x, y;
  523. pixel *src = (pixel *)_src;
  524. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  525. pixel *dst = (pixel *)_dst;
  526. ptrdiff_t dststride = _dststride / sizeof(pixel);
  527. int shift = 14 + 1 - BIT_DEPTH;
  528. int log2Wd = denom + shift - 1;
  529. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  530. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  531. for (y = 0; y < height; y++) {
  532. for (x = 0; x < width; x++) {
  533. dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
  534. }
  535. src += srcstride;
  536. dst += dststride;
  537. src2 += MAX_PB_SIZE;
  538. }
  539. }
  540. ////////////////////////////////////////////////////////////////////////////////
  541. //
  542. ////////////////////////////////////////////////////////////////////////////////
  543. #define QPEL_FILTER(src, stride) \
  544. (filter[0] * src[x - 3 * stride] + \
  545. filter[1] * src[x - 2 * stride] + \
  546. filter[2] * src[x - stride] + \
  547. filter[3] * src[x ] + \
  548. filter[4] * src[x + stride] + \
  549. filter[5] * src[x + 2 * stride] + \
  550. filter[6] * src[x + 3 * stride] + \
  551. filter[7] * src[x + 4 * stride])
  552. static void FUNC(put_hevc_qpel_h)(int16_t *dst,
  553. uint8_t *_src, ptrdiff_t _srcstride,
  554. int height, intptr_t mx, intptr_t my, int width)
  555. {
  556. int x, y;
  557. pixel *src = (pixel*)_src;
  558. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  559. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  560. for (y = 0; y < height; y++) {
  561. for (x = 0; x < width; x++)
  562. dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  563. src += srcstride;
  564. dst += MAX_PB_SIZE;
  565. }
  566. }
  567. static void FUNC(put_hevc_qpel_v)(int16_t *dst,
  568. uint8_t *_src, ptrdiff_t _srcstride,
  569. int height, intptr_t mx, intptr_t my, int width)
  570. {
  571. int x, y;
  572. pixel *src = (pixel*)_src;
  573. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  574. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  575. for (y = 0; y < height; y++) {
  576. for (x = 0; x < width; x++)
  577. dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  578. src += srcstride;
  579. dst += MAX_PB_SIZE;
  580. }
  581. }
  582. static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
  583. uint8_t *_src,
  584. ptrdiff_t _srcstride,
  585. int height, intptr_t mx,
  586. intptr_t my, int width)
  587. {
  588. int x, y;
  589. const int8_t *filter;
  590. pixel *src = (pixel*)_src;
  591. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  592. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  593. int16_t *tmp = tmp_array;
  594. src -= QPEL_EXTRA_BEFORE * srcstride;
  595. filter = ff_hevc_qpel_filters[mx - 1];
  596. for (y = 0; y < height + QPEL_EXTRA; y++) {
  597. for (x = 0; x < width; x++)
  598. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  599. src += srcstride;
  600. tmp += MAX_PB_SIZE;
  601. }
  602. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  603. filter = ff_hevc_qpel_filters[my - 1];
  604. for (y = 0; y < height; y++) {
  605. for (x = 0; x < width; x++)
  606. dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  607. tmp += MAX_PB_SIZE;
  608. dst += MAX_PB_SIZE;
  609. }
  610. }
  611. static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
  612. uint8_t *_src, ptrdiff_t _srcstride,
  613. int height, intptr_t mx, intptr_t my, int width)
  614. {
  615. int x, y;
  616. pixel *src = (pixel*)_src;
  617. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  618. pixel *dst = (pixel *)_dst;
  619. ptrdiff_t dststride = _dststride / sizeof(pixel);
  620. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  621. int shift = 14 - BIT_DEPTH;
  622. #if BIT_DEPTH < 14
  623. int offset = 1 << (shift - 1);
  624. #else
  625. int offset = 0;
  626. #endif
  627. for (y = 0; y < height; y++) {
  628. for (x = 0; x < width; x++)
  629. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  630. src += srcstride;
  631. dst += dststride;
  632. }
  633. }
  634. static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  635. int16_t *src2,
  636. int height, intptr_t mx, intptr_t my, int width)
  637. {
  638. int x, y;
  639. pixel *src = (pixel*)_src;
  640. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  641. pixel *dst = (pixel *)_dst;
  642. ptrdiff_t dststride = _dststride / sizeof(pixel);
  643. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  644. int shift = 14 + 1 - BIT_DEPTH;
  645. #if BIT_DEPTH < 14
  646. int offset = 1 << (shift - 1);
  647. #else
  648. int offset = 0;
  649. #endif
  650. for (y = 0; y < height; y++) {
  651. for (x = 0; x < width; x++)
  652. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  653. src += srcstride;
  654. dst += dststride;
  655. src2 += MAX_PB_SIZE;
  656. }
  657. }
  658. static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
  659. uint8_t *_src, ptrdiff_t _srcstride,
  660. int height, intptr_t mx, intptr_t my, int width)
  661. {
  662. int x, y;
  663. pixel *src = (pixel*)_src;
  664. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  665. pixel *dst = (pixel *)_dst;
  666. ptrdiff_t dststride = _dststride / sizeof(pixel);
  667. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  668. int shift = 14 - BIT_DEPTH;
  669. #if BIT_DEPTH < 14
  670. int offset = 1 << (shift - 1);
  671. #else
  672. int offset = 0;
  673. #endif
  674. for (y = 0; y < height; y++) {
  675. for (x = 0; x < width; x++)
  676. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  677. src += srcstride;
  678. dst += dststride;
  679. }
  680. }
  681. static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  682. int16_t *src2,
  683. int height, intptr_t mx, intptr_t my, int width)
  684. {
  685. int x, y;
  686. pixel *src = (pixel*)_src;
  687. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  688. pixel *dst = (pixel *)_dst;
  689. ptrdiff_t dststride = _dststride / sizeof(pixel);
  690. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  691. int shift = 14 + 1 - BIT_DEPTH;
  692. #if BIT_DEPTH < 14
  693. int offset = 1 << (shift - 1);
  694. #else
  695. int offset = 0;
  696. #endif
  697. for (y = 0; y < height; y++) {
  698. for (x = 0; x < width; x++)
  699. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  700. src += srcstride;
  701. dst += dststride;
  702. src2 += MAX_PB_SIZE;
  703. }
  704. }
  705. static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  706. uint8_t *_src, ptrdiff_t _srcstride,
  707. int height, intptr_t mx, intptr_t my, int width)
  708. {
  709. int x, y;
  710. const int8_t *filter;
  711. pixel *src = (pixel*)_src;
  712. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  713. pixel *dst = (pixel *)_dst;
  714. ptrdiff_t dststride = _dststride / sizeof(pixel);
  715. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  716. int16_t *tmp = tmp_array;
  717. int shift = 14 - BIT_DEPTH;
  718. #if BIT_DEPTH < 14
  719. int offset = 1 << (shift - 1);
  720. #else
  721. int offset = 0;
  722. #endif
  723. src -= QPEL_EXTRA_BEFORE * srcstride;
  724. filter = ff_hevc_qpel_filters[mx - 1];
  725. for (y = 0; y < height + QPEL_EXTRA; y++) {
  726. for (x = 0; x < width; x++)
  727. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  728. src += srcstride;
  729. tmp += MAX_PB_SIZE;
  730. }
  731. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  732. filter = ff_hevc_qpel_filters[my - 1];
  733. for (y = 0; y < height; y++) {
  734. for (x = 0; x < width; x++)
  735. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  736. tmp += MAX_PB_SIZE;
  737. dst += dststride;
  738. }
  739. }
  740. static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  741. int16_t *src2,
  742. int height, intptr_t mx, intptr_t my, int width)
  743. {
  744. int x, y;
  745. const int8_t *filter;
  746. pixel *src = (pixel*)_src;
  747. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  748. pixel *dst = (pixel *)_dst;
  749. ptrdiff_t dststride = _dststride / sizeof(pixel);
  750. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  751. int16_t *tmp = tmp_array;
  752. int shift = 14 + 1 - BIT_DEPTH;
  753. #if BIT_DEPTH < 14
  754. int offset = 1 << (shift - 1);
  755. #else
  756. int offset = 0;
  757. #endif
  758. src -= QPEL_EXTRA_BEFORE * srcstride;
  759. filter = ff_hevc_qpel_filters[mx - 1];
  760. for (y = 0; y < height + QPEL_EXTRA; y++) {
  761. for (x = 0; x < width; x++)
  762. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  763. src += srcstride;
  764. tmp += MAX_PB_SIZE;
  765. }
  766. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  767. filter = ff_hevc_qpel_filters[my - 1];
  768. for (y = 0; y < height; y++) {
  769. for (x = 0; x < width; x++)
  770. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  771. tmp += MAX_PB_SIZE;
  772. dst += dststride;
  773. src2 += MAX_PB_SIZE;
  774. }
  775. }
  776. static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
  777. uint8_t *_src, ptrdiff_t _srcstride,
  778. int height, int denom, int wx, int ox,
  779. intptr_t mx, intptr_t my, int width)
  780. {
  781. int x, y;
  782. pixel *src = (pixel*)_src;
  783. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  784. pixel *dst = (pixel *)_dst;
  785. ptrdiff_t dststride = _dststride / sizeof(pixel);
  786. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  787. int shift = denom + 14 - BIT_DEPTH;
  788. #if BIT_DEPTH < 14
  789. int offset = 1 << (shift - 1);
  790. #else
  791. int offset = 0;
  792. #endif
  793. ox = ox * (1 << (BIT_DEPTH - 8));
  794. for (y = 0; y < height; y++) {
  795. for (x = 0; x < width; x++)
  796. dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  797. src += srcstride;
  798. dst += dststride;
  799. }
  800. }
  801. static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  802. int16_t *src2,
  803. int height, int denom, int wx0, int wx1,
  804. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  805. {
  806. int x, y;
  807. pixel *src = (pixel*)_src;
  808. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  809. pixel *dst = (pixel *)_dst;
  810. ptrdiff_t dststride = _dststride / sizeof(pixel);
  811. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  812. int shift = 14 + 1 - BIT_DEPTH;
  813. int log2Wd = denom + shift - 1;
  814. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  815. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  816. for (y = 0; y < height; y++) {
  817. for (x = 0; x < width; x++)
  818. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  819. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  820. src += srcstride;
  821. dst += dststride;
  822. src2 += MAX_PB_SIZE;
  823. }
  824. }
  825. static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
  826. uint8_t *_src, ptrdiff_t _srcstride,
  827. int height, int denom, int wx, int ox,
  828. intptr_t mx, intptr_t my, int width)
  829. {
  830. int x, y;
  831. pixel *src = (pixel*)_src;
  832. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  833. pixel *dst = (pixel *)_dst;
  834. ptrdiff_t dststride = _dststride / sizeof(pixel);
  835. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  836. int shift = denom + 14 - BIT_DEPTH;
  837. #if BIT_DEPTH < 14
  838. int offset = 1 << (shift - 1);
  839. #else
  840. int offset = 0;
  841. #endif
  842. ox = ox * (1 << (BIT_DEPTH - 8));
  843. for (y = 0; y < height; y++) {
  844. for (x = 0; x < width; x++)
  845. dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  846. src += srcstride;
  847. dst += dststride;
  848. }
  849. }
  850. static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  851. int16_t *src2,
  852. int height, int denom, int wx0, int wx1,
  853. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  854. {
  855. int x, y;
  856. pixel *src = (pixel*)_src;
  857. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  858. pixel *dst = (pixel *)_dst;
  859. ptrdiff_t dststride = _dststride / sizeof(pixel);
  860. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  861. int shift = 14 + 1 - BIT_DEPTH;
  862. int log2Wd = denom + shift - 1;
  863. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  864. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  865. for (y = 0; y < height; y++) {
  866. for (x = 0; x < width; x++)
  867. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  868. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  869. src += srcstride;
  870. dst += dststride;
  871. src2 += MAX_PB_SIZE;
  872. }
  873. }
  874. static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  875. uint8_t *_src, ptrdiff_t _srcstride,
  876. int height, int denom, int wx, int ox,
  877. intptr_t mx, intptr_t my, int width)
  878. {
  879. int x, y;
  880. const int8_t *filter;
  881. pixel *src = (pixel*)_src;
  882. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  883. pixel *dst = (pixel *)_dst;
  884. ptrdiff_t dststride = _dststride / sizeof(pixel);
  885. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  886. int16_t *tmp = tmp_array;
  887. int shift = denom + 14 - BIT_DEPTH;
  888. #if BIT_DEPTH < 14
  889. int offset = 1 << (shift - 1);
  890. #else
  891. int offset = 0;
  892. #endif
  893. src -= QPEL_EXTRA_BEFORE * srcstride;
  894. filter = ff_hevc_qpel_filters[mx - 1];
  895. for (y = 0; y < height + QPEL_EXTRA; y++) {
  896. for (x = 0; x < width; x++)
  897. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  898. src += srcstride;
  899. tmp += MAX_PB_SIZE;
  900. }
  901. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  902. filter = ff_hevc_qpel_filters[my - 1];
  903. ox = ox * (1 << (BIT_DEPTH - 8));
  904. for (y = 0; y < height; y++) {
  905. for (x = 0; x < width; x++)
  906. dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  907. tmp += MAX_PB_SIZE;
  908. dst += dststride;
  909. }
  910. }
  911. static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  912. int16_t *src2,
  913. int height, int denom, int wx0, int wx1,
  914. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  915. {
  916. int x, y;
  917. const int8_t *filter;
  918. pixel *src = (pixel*)_src;
  919. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  920. pixel *dst = (pixel *)_dst;
  921. ptrdiff_t dststride = _dststride / sizeof(pixel);
  922. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  923. int16_t *tmp = tmp_array;
  924. int shift = 14 + 1 - BIT_DEPTH;
  925. int log2Wd = denom + shift - 1;
  926. src -= QPEL_EXTRA_BEFORE * srcstride;
  927. filter = ff_hevc_qpel_filters[mx - 1];
  928. for (y = 0; y < height + QPEL_EXTRA; y++) {
  929. for (x = 0; x < width; x++)
  930. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  931. src += srcstride;
  932. tmp += MAX_PB_SIZE;
  933. }
  934. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  935. filter = ff_hevc_qpel_filters[my - 1];
  936. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  937. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  938. for (y = 0; y < height; y++) {
  939. for (x = 0; x < width; x++)
  940. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  941. ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
  942. tmp += MAX_PB_SIZE;
  943. dst += dststride;
  944. src2 += MAX_PB_SIZE;
  945. }
  946. }
  947. ////////////////////////////////////////////////////////////////////////////////
  948. //
  949. ////////////////////////////////////////////////////////////////////////////////
  950. #define EPEL_FILTER(src, stride) \
  951. (filter[0] * src[x - stride] + \
  952. filter[1] * src[x] + \
  953. filter[2] * src[x + stride] + \
  954. filter[3] * src[x + 2 * stride])
  955. static void FUNC(put_hevc_epel_h)(int16_t *dst,
  956. uint8_t *_src, ptrdiff_t _srcstride,
  957. int height, intptr_t mx, intptr_t my, int width)
  958. {
  959. int x, y;
  960. pixel *src = (pixel *)_src;
  961. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  962. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  963. for (y = 0; y < height; y++) {
  964. for (x = 0; x < width; x++)
  965. dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  966. src += srcstride;
  967. dst += MAX_PB_SIZE;
  968. }
  969. }
  970. static void FUNC(put_hevc_epel_v)(int16_t *dst,
  971. uint8_t *_src, ptrdiff_t _srcstride,
  972. int height, intptr_t mx, intptr_t my, int width)
  973. {
  974. int x, y;
  975. pixel *src = (pixel *)_src;
  976. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  977. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  978. for (y = 0; y < height; y++) {
  979. for (x = 0; x < width; x++)
  980. dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  981. src += srcstride;
  982. dst += MAX_PB_SIZE;
  983. }
  984. }
  985. static void FUNC(put_hevc_epel_hv)(int16_t *dst,
  986. uint8_t *_src, ptrdiff_t _srcstride,
  987. int height, intptr_t mx, intptr_t my, int width)
  988. {
  989. int x, y;
  990. pixel *src = (pixel *)_src;
  991. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  992. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  993. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  994. int16_t *tmp = tmp_array;
  995. src -= EPEL_EXTRA_BEFORE * srcstride;
  996. for (y = 0; y < height + EPEL_EXTRA; y++) {
  997. for (x = 0; x < width; x++)
  998. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  999. src += srcstride;
  1000. tmp += MAX_PB_SIZE;
  1001. }
  1002. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1003. filter = ff_hevc_epel_filters[my - 1];
  1004. for (y = 0; y < height; y++) {
  1005. for (x = 0; x < width; x++)
  1006. dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  1007. tmp += MAX_PB_SIZE;
  1008. dst += MAX_PB_SIZE;
  1009. }
  1010. }
  1011. static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1012. int height, intptr_t mx, intptr_t my, int width)
  1013. {
  1014. int x, y;
  1015. pixel *src = (pixel *)_src;
  1016. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1017. pixel *dst = (pixel *)_dst;
  1018. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1019. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1020. int shift = 14 - BIT_DEPTH;
  1021. #if BIT_DEPTH < 14
  1022. int offset = 1 << (shift - 1);
  1023. #else
  1024. int offset = 0;
  1025. #endif
  1026. for (y = 0; y < height; y++) {
  1027. for (x = 0; x < width; x++)
  1028. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1029. src += srcstride;
  1030. dst += dststride;
  1031. }
  1032. }
  1033. static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1034. int16_t *src2,
  1035. int height, intptr_t mx, intptr_t my, int width)
  1036. {
  1037. int x, y;
  1038. pixel *src = (pixel *)_src;
  1039. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1040. pixel *dst = (pixel *)_dst;
  1041. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1042. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1043. int shift = 14 + 1 - BIT_DEPTH;
  1044. #if BIT_DEPTH < 14
  1045. int offset = 1 << (shift - 1);
  1046. #else
  1047. int offset = 0;
  1048. #endif
  1049. for (y = 0; y < height; y++) {
  1050. for (x = 0; x < width; x++) {
  1051. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1052. }
  1053. dst += dststride;
  1054. src += srcstride;
  1055. src2 += MAX_PB_SIZE;
  1056. }
  1057. }
  1058. static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1059. int height, intptr_t mx, intptr_t my, int width)
  1060. {
  1061. int x, y;
  1062. pixel *src = (pixel *)_src;
  1063. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1064. pixel *dst = (pixel *)_dst;
  1065. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1066. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1067. int shift = 14 - BIT_DEPTH;
  1068. #if BIT_DEPTH < 14
  1069. int offset = 1 << (shift - 1);
  1070. #else
  1071. int offset = 0;
  1072. #endif
  1073. for (y = 0; y < height; y++) {
  1074. for (x = 0; x < width; x++)
  1075. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1076. src += srcstride;
  1077. dst += dststride;
  1078. }
  1079. }
  1080. static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1081. int16_t *src2,
  1082. int height, intptr_t mx, intptr_t my, int width)
  1083. {
  1084. int x, y;
  1085. pixel *src = (pixel *)_src;
  1086. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1087. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1088. pixel *dst = (pixel *)_dst;
  1089. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1090. int shift = 14 + 1 - BIT_DEPTH;
  1091. #if BIT_DEPTH < 14
  1092. int offset = 1 << (shift - 1);
  1093. #else
  1094. int offset = 0;
  1095. #endif
  1096. for (y = 0; y < height; y++) {
  1097. for (x = 0; x < width; x++)
  1098. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1099. dst += dststride;
  1100. src += srcstride;
  1101. src2 += MAX_PB_SIZE;
  1102. }
  1103. }
  1104. static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1105. int height, intptr_t mx, intptr_t my, int width)
  1106. {
  1107. int x, y;
  1108. pixel *src = (pixel *)_src;
  1109. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1110. pixel *dst = (pixel *)_dst;
  1111. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1112. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1113. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1114. int16_t *tmp = tmp_array;
  1115. int shift = 14 - BIT_DEPTH;
  1116. #if BIT_DEPTH < 14
  1117. int offset = 1 << (shift - 1);
  1118. #else
  1119. int offset = 0;
  1120. #endif
  1121. src -= EPEL_EXTRA_BEFORE * srcstride;
  1122. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1123. for (x = 0; x < width; x++)
  1124. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1125. src += srcstride;
  1126. tmp += MAX_PB_SIZE;
  1127. }
  1128. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1129. filter = ff_hevc_epel_filters[my - 1];
  1130. for (y = 0; y < height; y++) {
  1131. for (x = 0; x < width; x++)
  1132. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  1133. tmp += MAX_PB_SIZE;
  1134. dst += dststride;
  1135. }
  1136. }
  1137. static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1138. int16_t *src2,
  1139. int height, intptr_t mx, intptr_t my, int width)
  1140. {
  1141. int x, y;
  1142. pixel *src = (pixel *)_src;
  1143. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1144. pixel *dst = (pixel *)_dst;
  1145. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1146. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1147. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1148. int16_t *tmp = tmp_array;
  1149. int shift = 14 + 1 - BIT_DEPTH;
  1150. #if BIT_DEPTH < 14
  1151. int offset = 1 << (shift - 1);
  1152. #else
  1153. int offset = 0;
  1154. #endif
  1155. src -= EPEL_EXTRA_BEFORE * srcstride;
  1156. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1157. for (x = 0; x < width; x++)
  1158. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1159. src += srcstride;
  1160. tmp += MAX_PB_SIZE;
  1161. }
  1162. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1163. filter = ff_hevc_epel_filters[my - 1];
  1164. for (y = 0; y < height; y++) {
  1165. for (x = 0; x < width; x++)
  1166. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  1167. tmp += MAX_PB_SIZE;
  1168. dst += dststride;
  1169. src2 += MAX_PB_SIZE;
  1170. }
  1171. }
  1172. static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1173. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1174. {
  1175. int x, y;
  1176. pixel *src = (pixel *)_src;
  1177. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1178. pixel *dst = (pixel *)_dst;
  1179. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1180. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1181. int shift = denom + 14 - BIT_DEPTH;
  1182. #if BIT_DEPTH < 14
  1183. int offset = 1 << (shift - 1);
  1184. #else
  1185. int offset = 0;
  1186. #endif
  1187. ox = ox * (1 << (BIT_DEPTH - 8));
  1188. for (y = 0; y < height; y++) {
  1189. for (x = 0; x < width; x++) {
  1190. dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1191. }
  1192. dst += dststride;
  1193. src += srcstride;
  1194. }
  1195. }
  1196. static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1197. int16_t *src2,
  1198. int height, int denom, int wx0, int wx1,
  1199. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1200. {
  1201. int x, y;
  1202. pixel *src = (pixel *)_src;
  1203. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1204. pixel *dst = (pixel *)_dst;
  1205. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1206. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1207. int shift = 14 + 1 - BIT_DEPTH;
  1208. int log2Wd = denom + shift - 1;
  1209. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1210. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1211. for (y = 0; y < height; y++) {
  1212. for (x = 0; x < width; x++)
  1213. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1214. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1215. src += srcstride;
  1216. dst += dststride;
  1217. src2 += MAX_PB_SIZE;
  1218. }
  1219. }
  1220. static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1221. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1222. {
  1223. int x, y;
  1224. pixel *src = (pixel *)_src;
  1225. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1226. pixel *dst = (pixel *)_dst;
  1227. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1228. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1229. int shift = denom + 14 - BIT_DEPTH;
  1230. #if BIT_DEPTH < 14
  1231. int offset = 1 << (shift - 1);
  1232. #else
  1233. int offset = 0;
  1234. #endif
  1235. ox = ox * (1 << (BIT_DEPTH - 8));
  1236. for (y = 0; y < height; y++) {
  1237. for (x = 0; x < width; x++) {
  1238. dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1239. }
  1240. dst += dststride;
  1241. src += srcstride;
  1242. }
  1243. }
  1244. static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1245. int16_t *src2,
  1246. int height, int denom, int wx0, int wx1,
  1247. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1248. {
  1249. int x, y;
  1250. pixel *src = (pixel *)_src;
  1251. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1252. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1253. pixel *dst = (pixel *)_dst;
  1254. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1255. int shift = 14 + 1 - BIT_DEPTH;
  1256. int log2Wd = denom + shift - 1;
  1257. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1258. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1259. for (y = 0; y < height; y++) {
  1260. for (x = 0; x < width; x++)
  1261. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1262. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1263. src += srcstride;
  1264. dst += dststride;
  1265. src2 += MAX_PB_SIZE;
  1266. }
  1267. }
  1268. static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1269. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1270. {
  1271. int x, y;
  1272. pixel *src = (pixel *)_src;
  1273. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1274. pixel *dst = (pixel *)_dst;
  1275. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1276. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1277. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1278. int16_t *tmp = tmp_array;
  1279. int shift = denom + 14 - BIT_DEPTH;
  1280. #if BIT_DEPTH < 14
  1281. int offset = 1 << (shift - 1);
  1282. #else
  1283. int offset = 0;
  1284. #endif
  1285. src -= EPEL_EXTRA_BEFORE * srcstride;
  1286. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1287. for (x = 0; x < width; x++)
  1288. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1289. src += srcstride;
  1290. tmp += MAX_PB_SIZE;
  1291. }
  1292. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1293. filter = ff_hevc_epel_filters[my - 1];
  1294. ox = ox * (1 << (BIT_DEPTH - 8));
  1295. for (y = 0; y < height; y++) {
  1296. for (x = 0; x < width; x++)
  1297. dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  1298. tmp += MAX_PB_SIZE;
  1299. dst += dststride;
  1300. }
  1301. }
  1302. static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1303. int16_t *src2,
  1304. int height, int denom, int wx0, int wx1,
  1305. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1306. {
  1307. int x, y;
  1308. pixel *src = (pixel *)_src;
  1309. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1310. pixel *dst = (pixel *)_dst;
  1311. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1312. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1313. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1314. int16_t *tmp = tmp_array;
  1315. int shift = 14 + 1 - BIT_DEPTH;
  1316. int log2Wd = denom + shift - 1;
  1317. src -= EPEL_EXTRA_BEFORE * srcstride;
  1318. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1319. for (x = 0; x < width; x++)
  1320. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1321. src += srcstride;
  1322. tmp += MAX_PB_SIZE;
  1323. }
  1324. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1325. filter = ff_hevc_epel_filters[my - 1];
  1326. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1327. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1328. for (y = 0; y < height; y++) {
  1329. for (x = 0; x < width; x++)
  1330. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  1331. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1332. tmp += MAX_PB_SIZE;
  1333. dst += dststride;
  1334. src2 += MAX_PB_SIZE;
  1335. }
  1336. }
  1337. // line zero
  1338. #define P3 pix[-4 * xstride]
  1339. #define P2 pix[-3 * xstride]
  1340. #define P1 pix[-2 * xstride]
  1341. #define P0 pix[-1 * xstride]
  1342. #define Q0 pix[0 * xstride]
  1343. #define Q1 pix[1 * xstride]
  1344. #define Q2 pix[2 * xstride]
  1345. #define Q3 pix[3 * xstride]
  1346. // line three. used only for deblocking decision
  1347. #define TP3 pix[-4 * xstride + 3 * ystride]
  1348. #define TP2 pix[-3 * xstride + 3 * ystride]
  1349. #define TP1 pix[-2 * xstride + 3 * ystride]
  1350. #define TP0 pix[-1 * xstride + 3 * ystride]
  1351. #define TQ0 pix[0 * xstride + 3 * ystride]
  1352. #define TQ1 pix[1 * xstride + 3 * ystride]
  1353. #define TQ2 pix[2 * xstride + 3 * ystride]
  1354. #define TQ3 pix[3 * xstride + 3 * ystride]
  1355. static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
  1356. ptrdiff_t _xstride, ptrdiff_t _ystride,
  1357. int beta, int *_tc,
  1358. uint8_t *_no_p, uint8_t *_no_q)
  1359. {
  1360. int d, j;
  1361. pixel *pix = (pixel *)_pix;
  1362. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1363. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1364. beta <<= BIT_DEPTH - 8;
  1365. for (j = 0; j < 2; j++) {
  1366. const int dp0 = abs(P2 - 2 * P1 + P0);
  1367. const int dq0 = abs(Q2 - 2 * Q1 + Q0);
  1368. const int dp3 = abs(TP2 - 2 * TP1 + TP0);
  1369. const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
  1370. const int d0 = dp0 + dq0;
  1371. const int d3 = dp3 + dq3;
  1372. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1373. const int no_p = _no_p[j];
  1374. const int no_q = _no_q[j];
  1375. if (d0 + d3 >= beta) {
  1376. pix += 4 * ystride;
  1377. continue;
  1378. } else {
  1379. const int beta_3 = beta >> 3;
  1380. const int beta_2 = beta >> 2;
  1381. const int tc25 = ((tc * 5 + 1) >> 1);
  1382. if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
  1383. abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
  1384. (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
  1385. // strong filtering
  1386. const int tc2 = tc << 1;
  1387. for (d = 0; d < 4; d++) {
  1388. const int p3 = P3;
  1389. const int p2 = P2;
  1390. const int p1 = P1;
  1391. const int p0 = P0;
  1392. const int q0 = Q0;
  1393. const int q1 = Q1;
  1394. const int q2 = Q2;
  1395. const int q3 = Q3;
  1396. if (!no_p) {
  1397. P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
  1398. P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
  1399. P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
  1400. }
  1401. if (!no_q) {
  1402. Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
  1403. Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
  1404. Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
  1405. }
  1406. pix += ystride;
  1407. }
  1408. } else { // normal filtering
  1409. int nd_p = 1;
  1410. int nd_q = 1;
  1411. const int tc_2 = tc >> 1;
  1412. if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
  1413. nd_p = 2;
  1414. if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
  1415. nd_q = 2;
  1416. for (d = 0; d < 4; d++) {
  1417. const int p2 = P2;
  1418. const int p1 = P1;
  1419. const int p0 = P0;
  1420. const int q0 = Q0;
  1421. const int q1 = Q1;
  1422. const int q2 = Q2;
  1423. int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
  1424. if (abs(delta0) < 10 * tc) {
  1425. delta0 = av_clip(delta0, -tc, tc);
  1426. if (!no_p)
  1427. P0 = av_clip_pixel(p0 + delta0);
  1428. if (!no_q)
  1429. Q0 = av_clip_pixel(q0 - delta0);
  1430. if (!no_p && nd_p > 1) {
  1431. const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
  1432. P1 = av_clip_pixel(p1 + deltap1);
  1433. }
  1434. if (!no_q && nd_q > 1) {
  1435. const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
  1436. Q1 = av_clip_pixel(q1 + deltaq1);
  1437. }
  1438. }
  1439. pix += ystride;
  1440. }
  1441. }
  1442. }
  1443. }
  1444. }
  1445. static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
  1446. ptrdiff_t _ystride, int *_tc,
  1447. uint8_t *_no_p, uint8_t *_no_q)
  1448. {
  1449. int d, j, no_p, no_q;
  1450. pixel *pix = (pixel *)_pix;
  1451. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1452. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1453. for (j = 0; j < 2; j++) {
  1454. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1455. if (tc <= 0) {
  1456. pix += 4 * ystride;
  1457. continue;
  1458. }
  1459. no_p = _no_p[j];
  1460. no_q = _no_q[j];
  1461. for (d = 0; d < 4; d++) {
  1462. int delta0;
  1463. const int p1 = P1;
  1464. const int p0 = P0;
  1465. const int q0 = Q0;
  1466. const int q1 = Q1;
  1467. delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
  1468. if (!no_p)
  1469. P0 = av_clip_pixel(p0 + delta0);
  1470. if (!no_q)
  1471. Q0 = av_clip_pixel(q0 - delta0);
  1472. pix += ystride;
  1473. }
  1474. }
  1475. }
  1476. static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1477. int32_t *tc, uint8_t *no_p,
  1478. uint8_t *no_q)
  1479. {
  1480. FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
  1481. }
  1482. static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1483. int32_t *tc, uint8_t *no_p,
  1484. uint8_t *no_q)
  1485. {
  1486. FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
  1487. }
  1488. static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1489. int beta, int32_t *tc, uint8_t *no_p,
  1490. uint8_t *no_q)
  1491. {
  1492. FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
  1493. beta, tc, no_p, no_q);
  1494. }
  1495. static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1496. int beta, int32_t *tc, uint8_t *no_p,
  1497. uint8_t *no_q)
  1498. {
  1499. FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
  1500. beta, tc, no_p, no_q);
  1501. }
  1502. #undef P3
  1503. #undef P2
  1504. #undef P1
  1505. #undef P0
  1506. #undef Q0
  1507. #undef Q1
  1508. #undef Q2
  1509. #undef Q3
  1510. #undef TP3
  1511. #undef TP2
  1512. #undef TP1
  1513. #undef TP0
  1514. #undef TQ0
  1515. #undef TQ1
  1516. #undef TQ2
  1517. #undef TQ3