You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1741 lines
64KB

  1. /*
  2. * HEVC video decoder
  3. *
  4. * Copyright (C) 2012 - 2013 Guillaume Martres
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "get_bits.h"
  23. #include "hevc.h"
  24. #include "bit_depth_template.c"
  25. #include "hevcdsp.h"
  26. static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
  27. GetBitContext *gb, int pcm_bit_depth)
  28. {
  29. int x, y;
  30. pixel *dst = (pixel *)_dst;
  31. stride /= sizeof(pixel);
  32. for (y = 0; y < height; y++) {
  33. for (x = 0; x < width; x++)
  34. dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
  35. dst += stride;
  36. }
  37. }
  38. static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
  39. ptrdiff_t stride)
  40. {
  41. int x, y;
  42. pixel *dst = (pixel *)_dst;
  43. stride /= sizeof(pixel);
  44. for (y = 0; y < 4; y++) {
  45. for (x = 0; x < 4; x++) {
  46. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  47. coeffs++;
  48. }
  49. dst += stride;
  50. }
  51. }
  52. static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
  53. ptrdiff_t stride)
  54. {
  55. int x, y;
  56. pixel *dst = (pixel *)_dst;
  57. stride /= sizeof(pixel);
  58. for (y = 0; y < 8; y++) {
  59. for (x = 0; x < 8; x++) {
  60. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  61. coeffs++;
  62. }
  63. dst += stride;
  64. }
  65. }
  66. static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
  67. ptrdiff_t stride)
  68. {
  69. int x, y;
  70. pixel *dst = (pixel *)_dst;
  71. stride /= sizeof(pixel);
  72. for (y = 0; y < 16; y++) {
  73. for (x = 0; x < 16; x++) {
  74. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  75. coeffs++;
  76. }
  77. dst += stride;
  78. }
  79. }
  80. static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
  81. ptrdiff_t stride)
  82. {
  83. int x, y;
  84. pixel *dst = (pixel *)_dst;
  85. stride /= sizeof(pixel);
  86. for (y = 0; y < 32; y++) {
  87. for (x = 0; x < 32; x++) {
  88. dst[x] = av_clip_pixel(dst[x] + *coeffs);
  89. coeffs++;
  90. }
  91. dst += stride;
  92. }
  93. }
  94. static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
  95. {
  96. int16_t *coeffs = (int16_t *) _coeffs;
  97. int x, y;
  98. int size = 1 << log2_size;
  99. if (mode) {
  100. coeffs += size;
  101. for (y = 0; y < size - 1; y++) {
  102. for (x = 0; x < size; x++)
  103. coeffs[x] += coeffs[x - size];
  104. coeffs += size;
  105. }
  106. } else {
  107. for (y = 0; y < size; y++) {
  108. for (x = 1; x < size; x++)
  109. coeffs[x] += coeffs[x - 1];
  110. coeffs += size;
  111. }
  112. }
  113. }
  114. static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
  115. {
  116. int shift = 15 - BIT_DEPTH - log2_size;
  117. int x, y;
  118. int size = 1 << log2_size;
  119. int16_t *coeffs = _coeffs;
  120. if (shift > 0) {
  121. int offset = 1 << (shift - 1);
  122. for (y = 0; y < size; y++) {
  123. for (x = 0; x < size; x++) {
  124. *coeffs = (*coeffs + offset) >> shift;
  125. coeffs++;
  126. }
  127. }
  128. } else {
  129. for (y = 0; y < size; y++) {
  130. for (x = 0; x < size; x++) {
  131. *coeffs = *(uint16_t*)coeffs << -shift;
  132. coeffs++;
  133. }
  134. }
  135. }
  136. }
  137. #define SET(dst, x) (dst) = (x)
  138. #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
  139. #define ADD_AND_SCALE(dst, x) \
  140. (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
  141. #define TR_4x4_LUMA(dst, src, step, assign) \
  142. do { \
  143. int c0 = src[0 * step] + src[2 * step]; \
  144. int c1 = src[2 * step] + src[3 * step]; \
  145. int c2 = src[0 * step] - src[3 * step]; \
  146. int c3 = 74 * src[1 * step]; \
  147. \
  148. assign(dst[2 * step], 74 * (src[0 * step] - \
  149. src[2 * step] + \
  150. src[3 * step])); \
  151. assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
  152. assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
  153. assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
  154. } while (0)
  155. static void FUNC(transform_4x4_luma)(int16_t *coeffs)
  156. {
  157. int i;
  158. int shift = 7;
  159. int add = 1 << (shift - 1);
  160. int16_t *src = coeffs;
  161. for (i = 0; i < 4; i++) {
  162. TR_4x4_LUMA(src, src, 4, SCALE);
  163. src++;
  164. }
  165. shift = 20 - BIT_DEPTH;
  166. add = 1 << (shift - 1);
  167. for (i = 0; i < 4; i++) {
  168. TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
  169. coeffs += 4;
  170. }
  171. }
  172. #undef TR_4x4_LUMA
  173. #define TR_4(dst, src, dstep, sstep, assign, end) \
  174. do { \
  175. const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
  176. const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
  177. const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
  178. const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
  179. \
  180. assign(dst[0 * dstep], e0 + o0); \
  181. assign(dst[1 * dstep], e1 + o1); \
  182. assign(dst[2 * dstep], e1 - o1); \
  183. assign(dst[3 * dstep], e0 - o0); \
  184. } while (0)
  185. #define TR_8(dst, src, dstep, sstep, assign, end) \
  186. do { \
  187. int i, j; \
  188. int e_8[4]; \
  189. int o_8[4] = { 0 }; \
  190. for (i = 0; i < 4; i++) \
  191. for (j = 1; j < end; j += 2) \
  192. o_8[i] += transform[4 * j][i] * src[j * sstep]; \
  193. TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
  194. \
  195. for (i = 0; i < 4; i++) { \
  196. assign(dst[i * dstep], e_8[i] + o_8[i]); \
  197. assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
  198. } \
  199. } while (0)
  200. #define TR_16(dst, src, dstep, sstep, assign, end) \
  201. do { \
  202. int i, j; \
  203. int e_16[8]; \
  204. int o_16[8] = { 0 }; \
  205. for (i = 0; i < 8; i++) \
  206. for (j = 1; j < end; j += 2) \
  207. o_16[i] += transform[2 * j][i] * src[j * sstep]; \
  208. TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
  209. \
  210. for (i = 0; i < 8; i++) { \
  211. assign(dst[i * dstep], e_16[i] + o_16[i]); \
  212. assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
  213. } \
  214. } while (0)
  215. #define TR_32(dst, src, dstep, sstep, assign, end) \
  216. do { \
  217. int i, j; \
  218. int e_32[16]; \
  219. int o_32[16] = { 0 }; \
  220. for (i = 0; i < 16; i++) \
  221. for (j = 1; j < end; j += 2) \
  222. o_32[i] += transform[j][i] * src[j * sstep]; \
  223. TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
  224. \
  225. for (i = 0; i < 16; i++) { \
  226. assign(dst[i * dstep], e_32[i] + o_32[i]); \
  227. assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
  228. } \
  229. } while (0)
  230. #define IDCT_VAR4(H) \
  231. int limit2 = FFMIN(col_limit + 4, H)
  232. #define IDCT_VAR8(H) \
  233. int limit = FFMIN(col_limit, H); \
  234. int limit2 = FFMIN(col_limit + 4, H)
  235. #define IDCT_VAR16(H) IDCT_VAR8(H)
  236. #define IDCT_VAR32(H) IDCT_VAR8(H)
  237. #define IDCT(H) \
  238. static void FUNC(idct_##H ##x ##H )( \
  239. int16_t *coeffs, int col_limit) { \
  240. int i; \
  241. int shift = 7; \
  242. int add = 1 << (shift - 1); \
  243. int16_t *src = coeffs; \
  244. IDCT_VAR ##H(H); \
  245. \
  246. for (i = 0; i < H; i++) { \
  247. TR_ ## H(src, src, H, H, SCALE, limit2); \
  248. if (limit2 < H && i%4 == 0 && !!i) \
  249. limit2 -= 4; \
  250. src++; \
  251. } \
  252. \
  253. shift = 20 - BIT_DEPTH; \
  254. add = 1 << (shift - 1); \
  255. for (i = 0; i < H; i++) { \
  256. TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
  257. coeffs += H; \
  258. } \
  259. }
  260. #define IDCT_DC(H) \
  261. static void FUNC(idct_##H ##x ##H ##_dc)( \
  262. int16_t *coeffs) { \
  263. int i, j; \
  264. int shift = 14 - BIT_DEPTH; \
  265. int add = 1 << (shift - 1); \
  266. int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
  267. \
  268. for (j = 0; j < H; j++) { \
  269. for (i = 0; i < H; i++) { \
  270. coeffs[i+j*H] = coeff; \
  271. } \
  272. } \
  273. }
  274. IDCT( 4)
  275. IDCT( 8)
  276. IDCT(16)
  277. IDCT(32)
  278. IDCT_DC( 4)
  279. IDCT_DC( 8)
  280. IDCT_DC(16)
  281. IDCT_DC(32)
  282. #undef TR_4
  283. #undef TR_8
  284. #undef TR_16
  285. #undef TR_32
  286. #undef SET
  287. #undef SCALE
  288. #undef ADD_AND_SCALE
  289. static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
  290. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  291. int *borders, int width, int height,
  292. int c_idx)
  293. {
  294. pixel *dst = (pixel *)_dst;
  295. pixel *src = (pixel *)_src;
  296. int offset_table[32] = { 0 };
  297. int k, y, x;
  298. int shift = BIT_DEPTH - 5;
  299. int16_t *sao_offset_val = sao->offset_val[c_idx];
  300. int sao_left_class = sao->band_position[c_idx];
  301. stride_dst /= sizeof(pixel);
  302. stride_src /= sizeof(pixel);
  303. for (k = 0; k < 4; k++)
  304. offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
  305. for (y = 0; y < height; y++) {
  306. for (x = 0; x < width; x++)
  307. dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
  308. dst += stride_dst;
  309. src += stride_src;
  310. }
  311. }
  312. #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
  313. static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
  314. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  315. int width, int height,
  316. int c_idx, int init_x, int init_y) {
  317. static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
  318. static const int8_t pos[4][2][2] = {
  319. { { -1, 0 }, { 1, 0 } }, // horizontal
  320. { { 0, -1 }, { 0, 1 } }, // vertical
  321. { { -1, -1 }, { 1, 1 } }, // 45 degree
  322. { { 1, -1 }, { -1, 1 } }, // 135 degree
  323. };
  324. int16_t *sao_offset_val = sao->offset_val[c_idx];
  325. int sao_eo_class = sao->eo_class[c_idx];
  326. pixel *dst = (pixel *)_dst;
  327. pixel *src = (pixel *)_src;
  328. int y_stride_src = init_y * stride_src;
  329. int y_stride_dst = init_y * stride_dst;
  330. int pos_0_0 = pos[sao_eo_class][0][0];
  331. int pos_0_1 = pos[sao_eo_class][0][1];
  332. int pos_1_0 = pos[sao_eo_class][1][0];
  333. int pos_1_1 = pos[sao_eo_class][1][1];
  334. int x, y;
  335. int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
  336. int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
  337. for (y = init_y; y < height; y++) {
  338. for (x = init_x; x < width; x++) {
  339. int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
  340. int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
  341. int offset_val = edge_idx[2 + diff0 + diff1];
  342. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
  343. }
  344. y_stride_src += stride_src;
  345. y_stride_dst += stride_dst;
  346. y_stride_0_1 += stride_src;
  347. y_stride_1_1 += stride_src;
  348. }
  349. }
  350. static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
  351. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  352. int *borders, int _width, int _height,
  353. int c_idx, uint8_t *vert_edge,
  354. uint8_t *horiz_edge, uint8_t *diag_edge)
  355. {
  356. int x, y;
  357. pixel *dst = (pixel *)_dst;
  358. pixel *src = (pixel *)_src;
  359. int16_t *sao_offset_val = sao->offset_val[c_idx];
  360. int sao_eo_class = sao->eo_class[c_idx];
  361. int init_x = 0, init_y = 0, width = _width, height = _height;
  362. stride_dst /= sizeof(pixel);
  363. stride_src /= sizeof(pixel);
  364. if (sao_eo_class != SAO_EO_VERT) {
  365. if (borders[0]) {
  366. int offset_val = sao_offset_val[0];
  367. for (y = 0; y < height; y++) {
  368. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  369. }
  370. init_x = 1;
  371. }
  372. if (borders[2]) {
  373. int offset_val = sao_offset_val[0];
  374. int offset = width - 1;
  375. for (x = 0; x < height; x++) {
  376. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  377. }
  378. width--;
  379. }
  380. }
  381. if (sao_eo_class != SAO_EO_HORIZ) {
  382. if (borders[1]) {
  383. int offset_val = sao_offset_val[0];
  384. for (x = init_x; x < width; x++)
  385. dst[x] = av_clip_pixel(src[x] + offset_val);
  386. init_y = 1;
  387. }
  388. if (borders[3]) {
  389. int offset_val = sao_offset_val[0];
  390. int y_stride_dst = stride_dst * (height - 1);
  391. int y_stride_src = stride_src * (height - 1);
  392. for (x = init_x; x < width; x++)
  393. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  394. height--;
  395. }
  396. }
  397. FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
  398. }
  399. static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
  400. ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
  401. int *borders, int _width, int _height,
  402. int c_idx, uint8_t *vert_edge,
  403. uint8_t *horiz_edge, uint8_t *diag_edge)
  404. {
  405. int x, y;
  406. pixel *dst = (pixel *)_dst;
  407. pixel *src = (pixel *)_src;
  408. int16_t *sao_offset_val = sao->offset_val[c_idx];
  409. int sao_eo_class = sao->eo_class[c_idx];
  410. int init_x = 0, init_y = 0, width = _width, height = _height;
  411. stride_dst /= sizeof(pixel);
  412. stride_src /= sizeof(pixel);
  413. if (sao_eo_class != SAO_EO_VERT) {
  414. if (borders[0]) {
  415. int offset_val = sao_offset_val[0];
  416. for (y = 0; y < height; y++) {
  417. dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
  418. }
  419. init_x = 1;
  420. }
  421. if (borders[2]) {
  422. int offset_val = sao_offset_val[0];
  423. int offset = width - 1;
  424. for (x = 0; x < height; x++) {
  425. dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
  426. }
  427. width--;
  428. }
  429. }
  430. if (sao_eo_class != SAO_EO_HORIZ) {
  431. if (borders[1]) {
  432. int offset_val = sao_offset_val[0];
  433. for (x = init_x; x < width; x++)
  434. dst[x] = av_clip_pixel(src[x] + offset_val);
  435. init_y = 1;
  436. }
  437. if (borders[3]) {
  438. int offset_val = sao_offset_val[0];
  439. int y_stride_dst = stride_dst * (height - 1);
  440. int y_stride_src = stride_src * (height - 1);
  441. for (x = init_x; x < width; x++)
  442. dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
  443. height--;
  444. }
  445. }
  446. FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
  447. {
  448. int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
  449. int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
  450. int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
  451. int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
  452. // Restore pixels that can't be modified
  453. if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
  454. for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
  455. dst[y*stride_dst] = src[y*stride_src];
  456. }
  457. if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
  458. for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
  459. dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
  460. }
  461. if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
  462. for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
  463. dst[x] = src[x];
  464. }
  465. if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
  466. for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
  467. dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
  468. }
  469. if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
  470. dst[0] = src[0];
  471. if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
  472. dst[width-1] = src[width-1];
  473. if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
  474. dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
  475. if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
  476. dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
  477. }
  478. }
  479. #undef CMP
  480. ////////////////////////////////////////////////////////////////////////////////
  481. //
  482. ////////////////////////////////////////////////////////////////////////////////
  483. static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
  484. uint8_t *_src, ptrdiff_t _srcstride,
  485. int height, intptr_t mx, intptr_t my, int width)
  486. {
  487. int x, y;
  488. pixel *src = (pixel *)_src;
  489. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  490. for (y = 0; y < height; y++) {
  491. for (x = 0; x < width; x++)
  492. dst[x] = src[x] << (14 - BIT_DEPTH);
  493. src += srcstride;
  494. dst += MAX_PB_SIZE;
  495. }
  496. }
  497. static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  498. int height, intptr_t mx, intptr_t my, int width)
  499. {
  500. int y;
  501. pixel *src = (pixel *)_src;
  502. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  503. pixel *dst = (pixel *)_dst;
  504. ptrdiff_t dststride = _dststride / sizeof(pixel);
  505. for (y = 0; y < height; y++) {
  506. memcpy(dst, src, width * sizeof(pixel));
  507. src += srcstride;
  508. dst += dststride;
  509. }
  510. }
  511. static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  512. int16_t *src2,
  513. int height, intptr_t mx, intptr_t my, int width)
  514. {
  515. int x, y;
  516. pixel *src = (pixel *)_src;
  517. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  518. pixel *dst = (pixel *)_dst;
  519. ptrdiff_t dststride = _dststride / sizeof(pixel);
  520. int shift = 14 + 1 - BIT_DEPTH;
  521. #if BIT_DEPTH < 14
  522. int offset = 1 << (shift - 1);
  523. #else
  524. int offset = 0;
  525. #endif
  526. for (y = 0; y < height; y++) {
  527. for (x = 0; x < width; x++)
  528. dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
  529. src += srcstride;
  530. dst += dststride;
  531. src2 += MAX_PB_SIZE;
  532. }
  533. }
  534. static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  535. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  536. {
  537. int x, y;
  538. pixel *src = (pixel *)_src;
  539. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  540. pixel *dst = (pixel *)_dst;
  541. ptrdiff_t dststride = _dststride / sizeof(pixel);
  542. int shift = denom + 14 - BIT_DEPTH;
  543. #if BIT_DEPTH < 14
  544. int offset = 1 << (shift - 1);
  545. #else
  546. int offset = 0;
  547. #endif
  548. ox = ox * (1 << (BIT_DEPTH - 8));
  549. for (y = 0; y < height; y++) {
  550. for (x = 0; x < width; x++)
  551. dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
  552. src += srcstride;
  553. dst += dststride;
  554. }
  555. }
  556. static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  557. int16_t *src2,
  558. int height, int denom, int wx0, int wx1,
  559. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  560. {
  561. int x, y;
  562. pixel *src = (pixel *)_src;
  563. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  564. pixel *dst = (pixel *)_dst;
  565. ptrdiff_t dststride = _dststride / sizeof(pixel);
  566. int shift = 14 + 1 - BIT_DEPTH;
  567. int log2Wd = denom + shift - 1;
  568. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  569. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  570. for (y = 0; y < height; y++) {
  571. for (x = 0; x < width; x++) {
  572. dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
  573. }
  574. src += srcstride;
  575. dst += dststride;
  576. src2 += MAX_PB_SIZE;
  577. }
  578. }
  579. ////////////////////////////////////////////////////////////////////////////////
  580. //
  581. ////////////////////////////////////////////////////////////////////////////////
  582. #define QPEL_FILTER(src, stride) \
  583. (filter[0] * src[x - 3 * stride] + \
  584. filter[1] * src[x - 2 * stride] + \
  585. filter[2] * src[x - stride] + \
  586. filter[3] * src[x ] + \
  587. filter[4] * src[x + stride] + \
  588. filter[5] * src[x + 2 * stride] + \
  589. filter[6] * src[x + 3 * stride] + \
  590. filter[7] * src[x + 4 * stride])
  591. static void FUNC(put_hevc_qpel_h)(int16_t *dst,
  592. uint8_t *_src, ptrdiff_t _srcstride,
  593. int height, intptr_t mx, intptr_t my, int width)
  594. {
  595. int x, y;
  596. pixel *src = (pixel*)_src;
  597. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  598. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  599. for (y = 0; y < height; y++) {
  600. for (x = 0; x < width; x++)
  601. dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  602. src += srcstride;
  603. dst += MAX_PB_SIZE;
  604. }
  605. }
  606. static void FUNC(put_hevc_qpel_v)(int16_t *dst,
  607. uint8_t *_src, ptrdiff_t _srcstride,
  608. int height, intptr_t mx, intptr_t my, int width)
  609. {
  610. int x, y;
  611. pixel *src = (pixel*)_src;
  612. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  613. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  614. for (y = 0; y < height; y++) {
  615. for (x = 0; x < width; x++)
  616. dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  617. src += srcstride;
  618. dst += MAX_PB_SIZE;
  619. }
  620. }
  621. static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
  622. uint8_t *_src,
  623. ptrdiff_t _srcstride,
  624. int height, intptr_t mx,
  625. intptr_t my, int width)
  626. {
  627. int x, y;
  628. const int8_t *filter;
  629. pixel *src = (pixel*)_src;
  630. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  631. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  632. int16_t *tmp = tmp_array;
  633. src -= QPEL_EXTRA_BEFORE * srcstride;
  634. filter = ff_hevc_qpel_filters[mx - 1];
  635. for (y = 0; y < height + QPEL_EXTRA; y++) {
  636. for (x = 0; x < width; x++)
  637. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  638. src += srcstride;
  639. tmp += MAX_PB_SIZE;
  640. }
  641. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  642. filter = ff_hevc_qpel_filters[my - 1];
  643. for (y = 0; y < height; y++) {
  644. for (x = 0; x < width; x++)
  645. dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  646. tmp += MAX_PB_SIZE;
  647. dst += MAX_PB_SIZE;
  648. }
  649. }
  650. static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
  651. uint8_t *_src, ptrdiff_t _srcstride,
  652. int height, intptr_t mx, intptr_t my, int width)
  653. {
  654. int x, y;
  655. pixel *src = (pixel*)_src;
  656. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  657. pixel *dst = (pixel *)_dst;
  658. ptrdiff_t dststride = _dststride / sizeof(pixel);
  659. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  660. int shift = 14 - BIT_DEPTH;
  661. #if BIT_DEPTH < 14
  662. int offset = 1 << (shift - 1);
  663. #else
  664. int offset = 0;
  665. #endif
  666. for (y = 0; y < height; y++) {
  667. for (x = 0; x < width; x++)
  668. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  669. src += srcstride;
  670. dst += dststride;
  671. }
  672. }
  673. static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  674. int16_t *src2,
  675. int height, intptr_t mx, intptr_t my, int width)
  676. {
  677. int x, y;
  678. pixel *src = (pixel*)_src;
  679. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  680. pixel *dst = (pixel *)_dst;
  681. ptrdiff_t dststride = _dststride / sizeof(pixel);
  682. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  683. int shift = 14 + 1 - BIT_DEPTH;
  684. #if BIT_DEPTH < 14
  685. int offset = 1 << (shift - 1);
  686. #else
  687. int offset = 0;
  688. #endif
  689. for (y = 0; y < height; y++) {
  690. for (x = 0; x < width; x++)
  691. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  692. src += srcstride;
  693. dst += dststride;
  694. src2 += MAX_PB_SIZE;
  695. }
  696. }
  697. static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
  698. uint8_t *_src, ptrdiff_t _srcstride,
  699. int height, intptr_t mx, intptr_t my, int width)
  700. {
  701. int x, y;
  702. pixel *src = (pixel*)_src;
  703. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  704. pixel *dst = (pixel *)_dst;
  705. ptrdiff_t dststride = _dststride / sizeof(pixel);
  706. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  707. int shift = 14 - BIT_DEPTH;
  708. #if BIT_DEPTH < 14
  709. int offset = 1 << (shift - 1);
  710. #else
  711. int offset = 0;
  712. #endif
  713. for (y = 0; y < height; y++) {
  714. for (x = 0; x < width; x++)
  715. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  716. src += srcstride;
  717. dst += dststride;
  718. }
  719. }
  720. static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  721. int16_t *src2,
  722. int height, intptr_t mx, intptr_t my, int width)
  723. {
  724. int x, y;
  725. pixel *src = (pixel*)_src;
  726. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  727. pixel *dst = (pixel *)_dst;
  728. ptrdiff_t dststride = _dststride / sizeof(pixel);
  729. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  730. int shift = 14 + 1 - BIT_DEPTH;
  731. #if BIT_DEPTH < 14
  732. int offset = 1 << (shift - 1);
  733. #else
  734. int offset = 0;
  735. #endif
  736. for (y = 0; y < height; y++) {
  737. for (x = 0; x < width; x++)
  738. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  739. src += srcstride;
  740. dst += dststride;
  741. src2 += MAX_PB_SIZE;
  742. }
  743. }
  744. static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  745. uint8_t *_src, ptrdiff_t _srcstride,
  746. int height, intptr_t mx, intptr_t my, int width)
  747. {
  748. int x, y;
  749. const int8_t *filter;
  750. pixel *src = (pixel*)_src;
  751. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  752. pixel *dst = (pixel *)_dst;
  753. ptrdiff_t dststride = _dststride / sizeof(pixel);
  754. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  755. int16_t *tmp = tmp_array;
  756. int shift = 14 - BIT_DEPTH;
  757. #if BIT_DEPTH < 14
  758. int offset = 1 << (shift - 1);
  759. #else
  760. int offset = 0;
  761. #endif
  762. src -= QPEL_EXTRA_BEFORE * srcstride;
  763. filter = ff_hevc_qpel_filters[mx - 1];
  764. for (y = 0; y < height + QPEL_EXTRA; y++) {
  765. for (x = 0; x < width; x++)
  766. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  767. src += srcstride;
  768. tmp += MAX_PB_SIZE;
  769. }
  770. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  771. filter = ff_hevc_qpel_filters[my - 1];
  772. for (y = 0; y < height; y++) {
  773. for (x = 0; x < width; x++)
  774. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  775. tmp += MAX_PB_SIZE;
  776. dst += dststride;
  777. }
  778. }
  779. static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  780. int16_t *src2,
  781. int height, intptr_t mx, intptr_t my, int width)
  782. {
  783. int x, y;
  784. const int8_t *filter;
  785. pixel *src = (pixel*)_src;
  786. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  787. pixel *dst = (pixel *)_dst;
  788. ptrdiff_t dststride = _dststride / sizeof(pixel);
  789. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  790. int16_t *tmp = tmp_array;
  791. int shift = 14 + 1 - BIT_DEPTH;
  792. #if BIT_DEPTH < 14
  793. int offset = 1 << (shift - 1);
  794. #else
  795. int offset = 0;
  796. #endif
  797. src -= QPEL_EXTRA_BEFORE * srcstride;
  798. filter = ff_hevc_qpel_filters[mx - 1];
  799. for (y = 0; y < height + QPEL_EXTRA; y++) {
  800. for (x = 0; x < width; x++)
  801. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  802. src += srcstride;
  803. tmp += MAX_PB_SIZE;
  804. }
  805. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  806. filter = ff_hevc_qpel_filters[my - 1];
  807. for (y = 0; y < height; y++) {
  808. for (x = 0; x < width; x++)
  809. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  810. tmp += MAX_PB_SIZE;
  811. dst += dststride;
  812. src2 += MAX_PB_SIZE;
  813. }
  814. }
  815. static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
  816. uint8_t *_src, ptrdiff_t _srcstride,
  817. int height, int denom, int wx, int ox,
  818. intptr_t mx, intptr_t my, int width)
  819. {
  820. int x, y;
  821. pixel *src = (pixel*)_src;
  822. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  823. pixel *dst = (pixel *)_dst;
  824. ptrdiff_t dststride = _dststride / sizeof(pixel);
  825. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  826. int shift = denom + 14 - BIT_DEPTH;
  827. #if BIT_DEPTH < 14
  828. int offset = 1 << (shift - 1);
  829. #else
  830. int offset = 0;
  831. #endif
  832. ox = ox * (1 << (BIT_DEPTH - 8));
  833. for (y = 0; y < height; y++) {
  834. for (x = 0; x < width; x++)
  835. dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  836. src += srcstride;
  837. dst += dststride;
  838. }
  839. }
  840. static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  841. int16_t *src2,
  842. int height, int denom, int wx0, int wx1,
  843. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  844. {
  845. int x, y;
  846. pixel *src = (pixel*)_src;
  847. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  848. pixel *dst = (pixel *)_dst;
  849. ptrdiff_t dststride = _dststride / sizeof(pixel);
  850. const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
  851. int shift = 14 + 1 - BIT_DEPTH;
  852. int log2Wd = denom + shift - 1;
  853. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  854. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  855. for (y = 0; y < height; y++) {
  856. for (x = 0; x < width; x++)
  857. dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  858. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  859. src += srcstride;
  860. dst += dststride;
  861. src2 += MAX_PB_SIZE;
  862. }
  863. }
  864. static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
  865. uint8_t *_src, ptrdiff_t _srcstride,
  866. int height, int denom, int wx, int ox,
  867. intptr_t mx, intptr_t my, int width)
  868. {
  869. int x, y;
  870. pixel *src = (pixel*)_src;
  871. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  872. pixel *dst = (pixel *)_dst;
  873. ptrdiff_t dststride = _dststride / sizeof(pixel);
  874. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  875. int shift = denom + 14 - BIT_DEPTH;
  876. #if BIT_DEPTH < 14
  877. int offset = 1 << (shift - 1);
  878. #else
  879. int offset = 0;
  880. #endif
  881. ox = ox * (1 << (BIT_DEPTH - 8));
  882. for (y = 0; y < height; y++) {
  883. for (x = 0; x < width; x++)
  884. dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  885. src += srcstride;
  886. dst += dststride;
  887. }
  888. }
  889. static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  890. int16_t *src2,
  891. int height, int denom, int wx0, int wx1,
  892. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  893. {
  894. int x, y;
  895. pixel *src = (pixel*)_src;
  896. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  897. pixel *dst = (pixel *)_dst;
  898. ptrdiff_t dststride = _dststride / sizeof(pixel);
  899. const int8_t *filter = ff_hevc_qpel_filters[my - 1];
  900. int shift = 14 + 1 - BIT_DEPTH;
  901. int log2Wd = denom + shift - 1;
  902. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  903. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  904. for (y = 0; y < height; y++) {
  905. for (x = 0; x < width; x++)
  906. dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  907. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  908. src += srcstride;
  909. dst += dststride;
  910. src2 += MAX_PB_SIZE;
  911. }
  912. }
  913. static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
  914. uint8_t *_src, ptrdiff_t _srcstride,
  915. int height, int denom, int wx, int ox,
  916. intptr_t mx, intptr_t my, int width)
  917. {
  918. int x, y;
  919. const int8_t *filter;
  920. pixel *src = (pixel*)_src;
  921. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  922. pixel *dst = (pixel *)_dst;
  923. ptrdiff_t dststride = _dststride / sizeof(pixel);
  924. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  925. int16_t *tmp = tmp_array;
  926. int shift = denom + 14 - BIT_DEPTH;
  927. #if BIT_DEPTH < 14
  928. int offset = 1 << (shift - 1);
  929. #else
  930. int offset = 0;
  931. #endif
  932. src -= QPEL_EXTRA_BEFORE * srcstride;
  933. filter = ff_hevc_qpel_filters[mx - 1];
  934. for (y = 0; y < height + QPEL_EXTRA; y++) {
  935. for (x = 0; x < width; x++)
  936. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  937. src += srcstride;
  938. tmp += MAX_PB_SIZE;
  939. }
  940. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  941. filter = ff_hevc_qpel_filters[my - 1];
  942. ox = ox * (1 << (BIT_DEPTH - 8));
  943. for (y = 0; y < height; y++) {
  944. for (x = 0; x < width; x++)
  945. dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  946. tmp += MAX_PB_SIZE;
  947. dst += dststride;
  948. }
  949. }
  950. static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  951. int16_t *src2,
  952. int height, int denom, int wx0, int wx1,
  953. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  954. {
  955. int x, y;
  956. const int8_t *filter;
  957. pixel *src = (pixel*)_src;
  958. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  959. pixel *dst = (pixel *)_dst;
  960. ptrdiff_t dststride = _dststride / sizeof(pixel);
  961. int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
  962. int16_t *tmp = tmp_array;
  963. int shift = 14 + 1 - BIT_DEPTH;
  964. int log2Wd = denom + shift - 1;
  965. src -= QPEL_EXTRA_BEFORE * srcstride;
  966. filter = ff_hevc_qpel_filters[mx - 1];
  967. for (y = 0; y < height + QPEL_EXTRA; y++) {
  968. for (x = 0; x < width; x++)
  969. tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  970. src += srcstride;
  971. tmp += MAX_PB_SIZE;
  972. }
  973. tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  974. filter = ff_hevc_qpel_filters[my - 1];
  975. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  976. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  977. for (y = 0; y < height; y++) {
  978. for (x = 0; x < width; x++)
  979. dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  980. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  981. tmp += MAX_PB_SIZE;
  982. dst += dststride;
  983. src2 += MAX_PB_SIZE;
  984. }
  985. }
  986. ////////////////////////////////////////////////////////////////////////////////
  987. //
  988. ////////////////////////////////////////////////////////////////////////////////
  989. #define EPEL_FILTER(src, stride) \
  990. (filter[0] * src[x - stride] + \
  991. filter[1] * src[x] + \
  992. filter[2] * src[x + stride] + \
  993. filter[3] * src[x + 2 * stride])
  994. static void FUNC(put_hevc_epel_h)(int16_t *dst,
  995. uint8_t *_src, ptrdiff_t _srcstride,
  996. int height, intptr_t mx, intptr_t my, int width)
  997. {
  998. int x, y;
  999. pixel *src = (pixel *)_src;
  1000. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1001. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1002. for (y = 0; y < height; y++) {
  1003. for (x = 0; x < width; x++)
  1004. dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1005. src += srcstride;
  1006. dst += MAX_PB_SIZE;
  1007. }
  1008. }
  1009. static void FUNC(put_hevc_epel_v)(int16_t *dst,
  1010. uint8_t *_src, ptrdiff_t _srcstride,
  1011. int height, intptr_t mx, intptr_t my, int width)
  1012. {
  1013. int x, y;
  1014. pixel *src = (pixel *)_src;
  1015. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1016. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1017. for (y = 0; y < height; y++) {
  1018. for (x = 0; x < width; x++)
  1019. dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
  1020. src += srcstride;
  1021. dst += MAX_PB_SIZE;
  1022. }
  1023. }
  1024. static void FUNC(put_hevc_epel_hv)(int16_t *dst,
  1025. uint8_t *_src, ptrdiff_t _srcstride,
  1026. int height, intptr_t mx, intptr_t my, int width)
  1027. {
  1028. int x, y;
  1029. pixel *src = (pixel *)_src;
  1030. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1031. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1032. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1033. int16_t *tmp = tmp_array;
  1034. src -= EPEL_EXTRA_BEFORE * srcstride;
  1035. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1036. for (x = 0; x < width; x++)
  1037. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1038. src += srcstride;
  1039. tmp += MAX_PB_SIZE;
  1040. }
  1041. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1042. filter = ff_hevc_epel_filters[my - 1];
  1043. for (y = 0; y < height; y++) {
  1044. for (x = 0; x < width; x++)
  1045. dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
  1046. tmp += MAX_PB_SIZE;
  1047. dst += MAX_PB_SIZE;
  1048. }
  1049. }
  1050. static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1051. int height, intptr_t mx, intptr_t my, int width)
  1052. {
  1053. int x, y;
  1054. pixel *src = (pixel *)_src;
  1055. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1056. pixel *dst = (pixel *)_dst;
  1057. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1058. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1059. int shift = 14 - BIT_DEPTH;
  1060. #if BIT_DEPTH < 14
  1061. int offset = 1 << (shift - 1);
  1062. #else
  1063. int offset = 0;
  1064. #endif
  1065. for (y = 0; y < height; y++) {
  1066. for (x = 0; x < width; x++)
  1067. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1068. src += srcstride;
  1069. dst += dststride;
  1070. }
  1071. }
  1072. static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1073. int16_t *src2,
  1074. int height, intptr_t mx, intptr_t my, int width)
  1075. {
  1076. int x, y;
  1077. pixel *src = (pixel *)_src;
  1078. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1079. pixel *dst = (pixel *)_dst;
  1080. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1081. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1082. int shift = 14 + 1 - BIT_DEPTH;
  1083. #if BIT_DEPTH < 14
  1084. int offset = 1 << (shift - 1);
  1085. #else
  1086. int offset = 0;
  1087. #endif
  1088. for (y = 0; y < height; y++) {
  1089. for (x = 0; x < width; x++) {
  1090. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1091. }
  1092. dst += dststride;
  1093. src += srcstride;
  1094. src2 += MAX_PB_SIZE;
  1095. }
  1096. }
  1097. static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1098. int height, intptr_t mx, intptr_t my, int width)
  1099. {
  1100. int x, y;
  1101. pixel *src = (pixel *)_src;
  1102. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1103. pixel *dst = (pixel *)_dst;
  1104. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1105. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1106. int shift = 14 - BIT_DEPTH;
  1107. #if BIT_DEPTH < 14
  1108. int offset = 1 << (shift - 1);
  1109. #else
  1110. int offset = 0;
  1111. #endif
  1112. for (y = 0; y < height; y++) {
  1113. for (x = 0; x < width; x++)
  1114. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
  1115. src += srcstride;
  1116. dst += dststride;
  1117. }
  1118. }
  1119. static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1120. int16_t *src2,
  1121. int height, intptr_t mx, intptr_t my, int width)
  1122. {
  1123. int x, y;
  1124. pixel *src = (pixel *)_src;
  1125. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1126. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1127. pixel *dst = (pixel *)_dst;
  1128. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1129. int shift = 14 + 1 - BIT_DEPTH;
  1130. #if BIT_DEPTH < 14
  1131. int offset = 1 << (shift - 1);
  1132. #else
  1133. int offset = 0;
  1134. #endif
  1135. for (y = 0; y < height; y++) {
  1136. for (x = 0; x < width; x++)
  1137. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
  1138. dst += dststride;
  1139. src += srcstride;
  1140. src2 += MAX_PB_SIZE;
  1141. }
  1142. }
  1143. static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1144. int height, intptr_t mx, intptr_t my, int width)
  1145. {
  1146. int x, y;
  1147. pixel *src = (pixel *)_src;
  1148. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1149. pixel *dst = (pixel *)_dst;
  1150. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1151. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1152. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1153. int16_t *tmp = tmp_array;
  1154. int shift = 14 - BIT_DEPTH;
  1155. #if BIT_DEPTH < 14
  1156. int offset = 1 << (shift - 1);
  1157. #else
  1158. int offset = 0;
  1159. #endif
  1160. src -= EPEL_EXTRA_BEFORE * srcstride;
  1161. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1162. for (x = 0; x < width; x++)
  1163. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1164. src += srcstride;
  1165. tmp += MAX_PB_SIZE;
  1166. }
  1167. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1168. filter = ff_hevc_epel_filters[my - 1];
  1169. for (y = 0; y < height; y++) {
  1170. for (x = 0; x < width; x++)
  1171. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
  1172. tmp += MAX_PB_SIZE;
  1173. dst += dststride;
  1174. }
  1175. }
  1176. static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1177. int16_t *src2,
  1178. int height, intptr_t mx, intptr_t my, int width)
  1179. {
  1180. int x, y;
  1181. pixel *src = (pixel *)_src;
  1182. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1183. pixel *dst = (pixel *)_dst;
  1184. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1185. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1186. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1187. int16_t *tmp = tmp_array;
  1188. int shift = 14 + 1 - BIT_DEPTH;
  1189. #if BIT_DEPTH < 14
  1190. int offset = 1 << (shift - 1);
  1191. #else
  1192. int offset = 0;
  1193. #endif
  1194. src -= EPEL_EXTRA_BEFORE * srcstride;
  1195. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1196. for (x = 0; x < width; x++)
  1197. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1198. src += srcstride;
  1199. tmp += MAX_PB_SIZE;
  1200. }
  1201. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1202. filter = ff_hevc_epel_filters[my - 1];
  1203. for (y = 0; y < height; y++) {
  1204. for (x = 0; x < width; x++)
  1205. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
  1206. tmp += MAX_PB_SIZE;
  1207. dst += dststride;
  1208. src2 += MAX_PB_SIZE;
  1209. }
  1210. }
  1211. static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1212. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1213. {
  1214. int x, y;
  1215. pixel *src = (pixel *)_src;
  1216. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1217. pixel *dst = (pixel *)_dst;
  1218. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1219. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1220. int shift = denom + 14 - BIT_DEPTH;
  1221. #if BIT_DEPTH < 14
  1222. int offset = 1 << (shift - 1);
  1223. #else
  1224. int offset = 0;
  1225. #endif
  1226. ox = ox * (1 << (BIT_DEPTH - 8));
  1227. for (y = 0; y < height; y++) {
  1228. for (x = 0; x < width; x++) {
  1229. dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1230. }
  1231. dst += dststride;
  1232. src += srcstride;
  1233. }
  1234. }
  1235. static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1236. int16_t *src2,
  1237. int height, int denom, int wx0, int wx1,
  1238. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1239. {
  1240. int x, y;
  1241. pixel *src = (pixel *)_src;
  1242. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1243. pixel *dst = (pixel *)_dst;
  1244. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1245. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1246. int shift = 14 + 1 - BIT_DEPTH;
  1247. int log2Wd = denom + shift - 1;
  1248. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1249. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1250. for (y = 0; y < height; y++) {
  1251. for (x = 0; x < width; x++)
  1252. dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1253. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1254. src += srcstride;
  1255. dst += dststride;
  1256. src2 += MAX_PB_SIZE;
  1257. }
  1258. }
  1259. static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1260. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1261. {
  1262. int x, y;
  1263. pixel *src = (pixel *)_src;
  1264. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1265. pixel *dst = (pixel *)_dst;
  1266. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1267. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1268. int shift = denom + 14 - BIT_DEPTH;
  1269. #if BIT_DEPTH < 14
  1270. int offset = 1 << (shift - 1);
  1271. #else
  1272. int offset = 0;
  1273. #endif
  1274. ox = ox * (1 << (BIT_DEPTH - 8));
  1275. for (y = 0; y < height; y++) {
  1276. for (x = 0; x < width; x++) {
  1277. dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
  1278. }
  1279. dst += dststride;
  1280. src += srcstride;
  1281. }
  1282. }
  1283. static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1284. int16_t *src2,
  1285. int height, int denom, int wx0, int wx1,
  1286. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1287. {
  1288. int x, y;
  1289. pixel *src = (pixel *)_src;
  1290. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1291. const int8_t *filter = ff_hevc_epel_filters[my - 1];
  1292. pixel *dst = (pixel *)_dst;
  1293. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1294. int shift = 14 + 1 - BIT_DEPTH;
  1295. int log2Wd = denom + shift - 1;
  1296. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1297. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1298. for (y = 0; y < height; y++) {
  1299. for (x = 0; x < width; x++)
  1300. dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
  1301. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1302. src += srcstride;
  1303. dst += dststride;
  1304. src2 += MAX_PB_SIZE;
  1305. }
  1306. }
  1307. static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1308. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
  1309. {
  1310. int x, y;
  1311. pixel *src = (pixel *)_src;
  1312. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1313. pixel *dst = (pixel *)_dst;
  1314. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1315. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1316. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1317. int16_t *tmp = tmp_array;
  1318. int shift = denom + 14 - BIT_DEPTH;
  1319. #if BIT_DEPTH < 14
  1320. int offset = 1 << (shift - 1);
  1321. #else
  1322. int offset = 0;
  1323. #endif
  1324. src -= EPEL_EXTRA_BEFORE * srcstride;
  1325. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1326. for (x = 0; x < width; x++)
  1327. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1328. src += srcstride;
  1329. tmp += MAX_PB_SIZE;
  1330. }
  1331. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1332. filter = ff_hevc_epel_filters[my - 1];
  1333. ox = ox * (1 << (BIT_DEPTH - 8));
  1334. for (y = 0; y < height; y++) {
  1335. for (x = 0; x < width; x++)
  1336. dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
  1337. tmp += MAX_PB_SIZE;
  1338. dst += dststride;
  1339. }
  1340. }
  1341. static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
  1342. int16_t *src2,
  1343. int height, int denom, int wx0, int wx1,
  1344. int ox0, int ox1, intptr_t mx, intptr_t my, int width)
  1345. {
  1346. int x, y;
  1347. pixel *src = (pixel *)_src;
  1348. ptrdiff_t srcstride = _srcstride / sizeof(pixel);
  1349. pixel *dst = (pixel *)_dst;
  1350. ptrdiff_t dststride = _dststride / sizeof(pixel);
  1351. const int8_t *filter = ff_hevc_epel_filters[mx - 1];
  1352. int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
  1353. int16_t *tmp = tmp_array;
  1354. int shift = 14 + 1 - BIT_DEPTH;
  1355. int log2Wd = denom + shift - 1;
  1356. src -= EPEL_EXTRA_BEFORE * srcstride;
  1357. for (y = 0; y < height + EPEL_EXTRA; y++) {
  1358. for (x = 0; x < width; x++)
  1359. tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
  1360. src += srcstride;
  1361. tmp += MAX_PB_SIZE;
  1362. }
  1363. tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
  1364. filter = ff_hevc_epel_filters[my - 1];
  1365. ox0 = ox0 * (1 << (BIT_DEPTH - 8));
  1366. ox1 = ox1 * (1 << (BIT_DEPTH - 8));
  1367. for (y = 0; y < height; y++) {
  1368. for (x = 0; x < width; x++)
  1369. dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
  1370. ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
  1371. tmp += MAX_PB_SIZE;
  1372. dst += dststride;
  1373. src2 += MAX_PB_SIZE;
  1374. }
  1375. }// line zero
  1376. #define P3 pix[-4 * xstride]
  1377. #define P2 pix[-3 * xstride]
  1378. #define P1 pix[-2 * xstride]
  1379. #define P0 pix[-1 * xstride]
  1380. #define Q0 pix[0 * xstride]
  1381. #define Q1 pix[1 * xstride]
  1382. #define Q2 pix[2 * xstride]
  1383. #define Q3 pix[3 * xstride]
  1384. // line three. used only for deblocking decision
  1385. #define TP3 pix[-4 * xstride + 3 * ystride]
  1386. #define TP2 pix[-3 * xstride + 3 * ystride]
  1387. #define TP1 pix[-2 * xstride + 3 * ystride]
  1388. #define TP0 pix[-1 * xstride + 3 * ystride]
  1389. #define TQ0 pix[0 * xstride + 3 * ystride]
  1390. #define TQ1 pix[1 * xstride + 3 * ystride]
  1391. #define TQ2 pix[2 * xstride + 3 * ystride]
  1392. #define TQ3 pix[3 * xstride + 3 * ystride]
  1393. static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
  1394. ptrdiff_t _xstride, ptrdiff_t _ystride,
  1395. int beta, int *_tc,
  1396. uint8_t *_no_p, uint8_t *_no_q)
  1397. {
  1398. int d, j;
  1399. pixel *pix = (pixel *)_pix;
  1400. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1401. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1402. beta <<= BIT_DEPTH - 8;
  1403. for (j = 0; j < 2; j++) {
  1404. const int dp0 = abs(P2 - 2 * P1 + P0);
  1405. const int dq0 = abs(Q2 - 2 * Q1 + Q0);
  1406. const int dp3 = abs(TP2 - 2 * TP1 + TP0);
  1407. const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
  1408. const int d0 = dp0 + dq0;
  1409. const int d3 = dp3 + dq3;
  1410. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1411. const int no_p = _no_p[j];
  1412. const int no_q = _no_q[j];
  1413. if (d0 + d3 >= beta) {
  1414. pix += 4 * ystride;
  1415. continue;
  1416. } else {
  1417. const int beta_3 = beta >> 3;
  1418. const int beta_2 = beta >> 2;
  1419. const int tc25 = ((tc * 5 + 1) >> 1);
  1420. if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
  1421. abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
  1422. (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
  1423. // strong filtering
  1424. const int tc2 = tc << 1;
  1425. for (d = 0; d < 4; d++) {
  1426. const int p3 = P3;
  1427. const int p2 = P2;
  1428. const int p1 = P1;
  1429. const int p0 = P0;
  1430. const int q0 = Q0;
  1431. const int q1 = Q1;
  1432. const int q2 = Q2;
  1433. const int q3 = Q3;
  1434. if (!no_p) {
  1435. P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
  1436. P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
  1437. P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
  1438. }
  1439. if (!no_q) {
  1440. Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
  1441. Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
  1442. Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
  1443. }
  1444. pix += ystride;
  1445. }
  1446. } else { // normal filtering
  1447. int nd_p = 1;
  1448. int nd_q = 1;
  1449. const int tc_2 = tc >> 1;
  1450. if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
  1451. nd_p = 2;
  1452. if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
  1453. nd_q = 2;
  1454. for (d = 0; d < 4; d++) {
  1455. const int p2 = P2;
  1456. const int p1 = P1;
  1457. const int p0 = P0;
  1458. const int q0 = Q0;
  1459. const int q1 = Q1;
  1460. const int q2 = Q2;
  1461. int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
  1462. if (abs(delta0) < 10 * tc) {
  1463. delta0 = av_clip(delta0, -tc, tc);
  1464. if (!no_p)
  1465. P0 = av_clip_pixel(p0 + delta0);
  1466. if (!no_q)
  1467. Q0 = av_clip_pixel(q0 - delta0);
  1468. if (!no_p && nd_p > 1) {
  1469. const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
  1470. P1 = av_clip_pixel(p1 + deltap1);
  1471. }
  1472. if (!no_q && nd_q > 1) {
  1473. const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
  1474. Q1 = av_clip_pixel(q1 + deltaq1);
  1475. }
  1476. }
  1477. pix += ystride;
  1478. }
  1479. }
  1480. }
  1481. }
  1482. }
  1483. static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
  1484. ptrdiff_t _ystride, int *_tc,
  1485. uint8_t *_no_p, uint8_t *_no_q)
  1486. {
  1487. int d, j, no_p, no_q;
  1488. pixel *pix = (pixel *)_pix;
  1489. ptrdiff_t xstride = _xstride / sizeof(pixel);
  1490. ptrdiff_t ystride = _ystride / sizeof(pixel);
  1491. for (j = 0; j < 2; j++) {
  1492. const int tc = _tc[j] << (BIT_DEPTH - 8);
  1493. if (tc <= 0) {
  1494. pix += 4 * ystride;
  1495. continue;
  1496. }
  1497. no_p = _no_p[j];
  1498. no_q = _no_q[j];
  1499. for (d = 0; d < 4; d++) {
  1500. int delta0;
  1501. const int p1 = P1;
  1502. const int p0 = P0;
  1503. const int q0 = Q0;
  1504. const int q1 = Q1;
  1505. delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
  1506. if (!no_p)
  1507. P0 = av_clip_pixel(p0 + delta0);
  1508. if (!no_q)
  1509. Q0 = av_clip_pixel(q0 - delta0);
  1510. pix += ystride;
  1511. }
  1512. }
  1513. }
  1514. static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1515. int32_t *tc, uint8_t *no_p,
  1516. uint8_t *no_q)
  1517. {
  1518. FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
  1519. }
  1520. static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
  1521. int32_t *tc, uint8_t *no_p,
  1522. uint8_t *no_q)
  1523. {
  1524. FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
  1525. }
  1526. static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1527. int beta, int32_t *tc, uint8_t *no_p,
  1528. uint8_t *no_q)
  1529. {
  1530. FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
  1531. beta, tc, no_p, no_q);
  1532. }
  1533. static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
  1534. int beta, int32_t *tc, uint8_t *no_p,
  1535. uint8_t *no_q)
  1536. {
  1537. FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
  1538. beta, tc, no_p, no_q);
  1539. }
  1540. #undef P3
  1541. #undef P2
  1542. #undef P1
  1543. #undef P0
  1544. #undef Q0
  1545. #undef Q1
  1546. #undef Q2
  1547. #undef Q3
  1548. #undef TP3
  1549. #undef TP2
  1550. #undef TP1
  1551. #undef TP0
  1552. #undef TQ0
  1553. #undef TQ1
  1554. #undef TQ2
  1555. #undef TQ3