You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

531 lines
20KB

  1. /*
  2. * HEVC video decoder
  3. *
  4. * Copyright (C) 2012 - 2013 Guillaume Martres
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "libavutil/pixdesc.h"
  23. #include "hevcdec.h"
  24. #include "bit_depth_template.c"
  25. #define POS(x, y) src[(x) + stride * (y)]
  26. static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
  27. int log2_size, int c_idx)
  28. {
  29. #define PU(x) \
  30. ((x) >> s->ps.sps->log2_min_pu_size)
  31. #define MVF(x, y) \
  32. (s->ref->tab_mvf[(x) + (y) * min_pu_width])
  33. #define MVF_PU(x, y) \
  34. MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift)))
  35. #define IS_INTRA(x, y) \
  36. MVF_PU(x, y).is_intra
  37. #define MIN_TB_ADDR_ZS(x, y) \
  38. s->ps.pps->min_tb_addr_zs[(y) * s->ps.sps->min_tb_width + (x)]
  39. #define EXTEND(ptr, val, len) \
  40. do { \
  41. pixel4 pix = PIXEL_SPLAT_X4(val); \
  42. for (i = 0; i < (len); i += 4) \
  43. AV_WN4P(ptr + i, pix); \
  44. } while (0)
  45. #define EXTEND_LEFT_CIP(ptr, start, length) \
  46. for (i = (start); i > (start) - (length); i--) \
  47. if (!IS_INTRA(i - 1, -1)) \
  48. ptr[i - 1] = ptr[i]
  49. #define EXTEND_RIGHT_CIP(ptr, start, length) \
  50. for (i = (start); i < (start) + (length); i++) \
  51. if (!IS_INTRA(i, -1)) \
  52. ptr[i] = ptr[i - 1]
  53. #define EXTEND_UP_CIP(ptr, start, length) \
  54. for (i = (start); i > (start) - (length); i--) \
  55. if (!IS_INTRA(-1, i - 1)) \
  56. ptr[i - 1] = ptr[i]
  57. #define EXTEND_UP_CIP_0(ptr, start, length) \
  58. for (i = (start); i > (start) - (length); i--) \
  59. ptr[i - 1] = ptr[i]
  60. #define EXTEND_DOWN_CIP(ptr, start, length) \
  61. for (i = (start); i < (start) + (length); i++) \
  62. if (!IS_INTRA(-1, i)) \
  63. ptr[i] = ptr[i - 1]
  64. HEVCLocalContext *lc = &s->HEVClc;
  65. int i;
  66. int hshift = s->ps.sps->hshift[c_idx];
  67. int vshift = s->ps.sps->vshift[c_idx];
  68. int size = (1 << log2_size);
  69. int size_in_luma = size << hshift;
  70. int size_in_tbs = size_in_luma >> s->ps.sps->log2_min_tb_size;
  71. int x = x0 >> hshift;
  72. int y = y0 >> vshift;
  73. int x_tb = x0 >> s->ps.sps->log2_min_tb_size;
  74. int y_tb = y0 >> s->ps.sps->log2_min_tb_size;
  75. int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
  76. ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
  77. pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;
  78. int min_pu_width = s->ps.sps->min_pu_width;
  79. enum IntraPredMode mode = c_idx ? lc->pu.intra_pred_mode_c :
  80. lc->tu.cur_intra_pred_mode;
  81. pixel left_array[2 * MAX_TB_SIZE + 1];
  82. pixel filtered_left_array[2 * MAX_TB_SIZE + 1];
  83. pixel top_array[2 * MAX_TB_SIZE + 1];
  84. pixel filtered_top_array[2 * MAX_TB_SIZE + 1];
  85. pixel *left = left_array + 1;
  86. pixel *top = top_array + 1;
  87. pixel *filtered_left = filtered_left_array + 1;
  88. pixel *filtered_top = filtered_top_array + 1;
  89. int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs);
  90. int cand_left = lc->na.cand_left;
  91. int cand_up_left = lc->na.cand_up_left;
  92. int cand_up = lc->na.cand_up;
  93. int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs, y_tb - 1);
  94. int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma, s->ps.sps->height) -
  95. (y0 + size_in_luma)) >> vshift;
  96. int top_right_size = (FFMIN(x0 + 2 * size_in_luma, s->ps.sps->width) -
  97. (x0 + size_in_luma)) >> hshift;
  98. if (s->ps.pps->constrained_intra_pred_flag == 1) {
  99. int size_in_luma_pu = PU(size_in_luma);
  100. int on_pu_edge_x = !(x0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
  101. int on_pu_edge_y = !(y0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
  102. if (!size_in_luma_pu)
  103. size_in_luma_pu++;
  104. if (cand_bottom_left == 1 && on_pu_edge_x) {
  105. int x_left_pu = PU(x0 - 1);
  106. int y_bottom_pu = PU(y0 + size_in_luma);
  107. int max = FFMIN(size_in_luma_pu, s->ps.sps->min_pu_height - y_bottom_pu);
  108. cand_bottom_left = 0;
  109. for (i = 0; i < max; i++)
  110. cand_bottom_left |= MVF(x_left_pu, y_bottom_pu + i).is_intra;
  111. }
  112. if (cand_left == 1 && on_pu_edge_x) {
  113. int x_left_pu = PU(x0 - 1);
  114. int y_left_pu = PU(y0);
  115. int max = FFMIN(size_in_luma_pu, s->ps.sps->min_pu_height - y_left_pu);
  116. cand_left = 0;
  117. for (i = 0; i < max; i++)
  118. cand_left |= MVF(x_left_pu, y_left_pu + i).is_intra;
  119. }
  120. if (cand_up_left == 1) {
  121. int x_left_pu = PU(x0 - 1);
  122. int y_top_pu = PU(y0 - 1);
  123. cand_up_left = MVF(x_left_pu, y_top_pu).is_intra;
  124. }
  125. if (cand_up == 1 && on_pu_edge_y) {
  126. int x_top_pu = PU(x0);
  127. int y_top_pu = PU(y0 - 1);
  128. int max = FFMIN(size_in_luma_pu, s->ps.sps->min_pu_width - x_top_pu);
  129. cand_up = 0;
  130. for (i = 0; i < max; i++)
  131. cand_up |= MVF(x_top_pu + i, y_top_pu).is_intra;
  132. }
  133. if (cand_up_right == 1 && on_pu_edge_y) {
  134. int y_top_pu = PU(y0 - 1);
  135. int x_right_pu = PU(x0 + size_in_luma);
  136. int max = FFMIN(size_in_luma_pu, s->ps.sps->min_pu_width - x_right_pu);
  137. cand_up_right = 0;
  138. for (i = 0; i < max; i++)
  139. cand_up_right |= MVF(x_right_pu + i, y_top_pu).is_intra;
  140. }
  141. for (i = 0; i < 2 * MAX_TB_SIZE; i++) {
  142. left[i] = 128;
  143. top[i] = 128;
  144. }
  145. }
  146. if (cand_bottom_left) {
  147. for (i = size; i < size + bottom_left_size; i++)
  148. left[i] = POS(-1, i);
  149. EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
  150. size - bottom_left_size);
  151. }
  152. if (cand_left)
  153. for (i = size - 1; i >= 0; i--)
  154. left[i] = POS(-1, i);
  155. if (cand_up_left) {
  156. left[-1] = POS(-1, -1);
  157. top[-1] = left[-1];
  158. }
  159. if (cand_up)
  160. memcpy(top, src - stride, size * sizeof(pixel));
  161. if (cand_up_right) {
  162. memcpy(top + size, src - stride + size, size * sizeof(pixel));
  163. EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
  164. size - top_right_size);
  165. }
  166. if (s->ps.pps->constrained_intra_pred_flag == 1) {
  167. if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
  168. int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
  169. 2 * size : (s->ps.sps->width - x0) >> hshift;
  170. int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
  171. 2 * size : (s->ps.sps->height - y0) >> vshift;
  172. int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
  173. if (!cand_up_right) {
  174. size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
  175. size : (s->ps.sps->width - x0) >> hshift;
  176. }
  177. if (!cand_bottom_left) {
  178. size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
  179. size : (s->ps.sps->height - y0) >> vshift;
  180. }
  181. if (cand_bottom_left || cand_left || cand_up_left) {
  182. while (j > -1 && !IS_INTRA(-1, j))
  183. j--;
  184. if (!IS_INTRA(-1, j)) {
  185. j = 0;
  186. while (j < size_max_x && !IS_INTRA(j, -1))
  187. j++;
  188. EXTEND_LEFT_CIP(top, j, j + 1);
  189. left[-1] = top[-1];
  190. j = 0;
  191. }
  192. } else {
  193. j = 0;
  194. while (j < size_max_x && !IS_INTRA(j, -1))
  195. j++;
  196. if (j > 0)
  197. if (x0 > 0) {
  198. EXTEND_LEFT_CIP(top, j, j + 1);
  199. } else {
  200. EXTEND_LEFT_CIP(top, j, j);
  201. top[-1] = top[0];
  202. }
  203. left[-1] = top[-1];
  204. j = 0;
  205. }
  206. if (cand_bottom_left || cand_left) {
  207. EXTEND_DOWN_CIP(left, j, size_max_y - j);
  208. }
  209. if (!cand_left)
  210. EXTEND(left, left[-1], size);
  211. if (!cand_bottom_left)
  212. EXTEND(left + size, left[size - 1], size);
  213. if (x0 != 0 && y0 != 0) {
  214. EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
  215. } else if (x0 == 0) {
  216. EXTEND_UP_CIP_0(left, size_max_y - 1, size_max_y);
  217. } else {
  218. EXTEND_UP_CIP(left, size_max_y - 1, size_max_y - 1);
  219. }
  220. top[-1] = left[-1];
  221. if (y0 != 0) {
  222. EXTEND_RIGHT_CIP(top, 0, size_max_x);
  223. }
  224. }
  225. }
  226. // Infer the unavailable samples
  227. if (!cand_bottom_left) {
  228. if (cand_left) {
  229. EXTEND(left + size, left[size - 1], size);
  230. } else if (cand_up_left) {
  231. EXTEND(left, left[-1], 2 * size);
  232. cand_left = 1;
  233. } else if (cand_up) {
  234. left[-1] = top[0];
  235. EXTEND(left, left[-1], 2 * size);
  236. cand_up_left = 1;
  237. cand_left = 1;
  238. } else if (cand_up_right) {
  239. EXTEND(top, top[size], size);
  240. left[-1] = top[size];
  241. EXTEND(left, left[-1], 2 * size);
  242. cand_up = 1;
  243. cand_up_left = 1;
  244. cand_left = 1;
  245. } else { // No samples available
  246. left[-1] = (1 << (BIT_DEPTH - 1));
  247. EXTEND(top, left[-1], 2 * size);
  248. EXTEND(left, left[-1], 2 * size);
  249. }
  250. }
  251. if (!cand_left)
  252. EXTEND(left, left[size], size);
  253. if (!cand_up_left) {
  254. left[-1] = left[0];
  255. }
  256. if (!cand_up)
  257. EXTEND(top, left[-1], size);
  258. if (!cand_up_right)
  259. EXTEND(top + size, top[size - 1], size);
  260. top[-1] = left[-1];
  261. // Filtering process
  262. if (c_idx == 0 && mode != INTRA_DC && size != 4) {
  263. int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
  264. int min_dist_vert_hor = FFMIN(FFABS((int)mode - 26),
  265. FFABS((int)mode - 10));
  266. if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
  267. int threshold = 1 << (BIT_DEPTH - 5);
  268. if (s->ps.sps->sps_strong_intra_smoothing_enable_flag &&
  269. log2_size == 5 &&
  270. FFABS(top[-1] + top[63] - 2 * top[31]) < threshold &&
  271. FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
  272. // We can't just overwrite values in top because it could be
  273. // a pointer into src
  274. filtered_top[-1] = top[-1];
  275. filtered_top[63] = top[63];
  276. for (i = 0; i < 63; i++)
  277. filtered_top[i] = ((64 - (i + 1)) * top[-1] +
  278. (i + 1) * top[63] + 32) >> 6;
  279. for (i = 0; i < 63; i++)
  280. left[i] = ((64 - (i + 1)) * left[-1] +
  281. (i + 1) * left[63] + 32) >> 6;
  282. top = filtered_top;
  283. } else {
  284. filtered_left[2 * size - 1] = left[2 * size - 1];
  285. filtered_top[2 * size - 1] = top[2 * size - 1];
  286. for (i = 2 * size - 2; i >= 0; i--)
  287. filtered_left[i] = (left[i + 1] + 2 * left[i] +
  288. left[i - 1] + 2) >> 2;
  289. filtered_top[-1] =
  290. filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
  291. for (i = 2 * size - 2; i >= 0; i--)
  292. filtered_top[i] = (top[i + 1] + 2 * top[i] +
  293. top[i - 1] + 2) >> 2;
  294. left = filtered_left;
  295. top = filtered_top;
  296. }
  297. }
  298. }
  299. switch (mode) {
  300. case INTRA_PLANAR:
  301. s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
  302. (uint8_t *)left, stride);
  303. break;
  304. case INTRA_DC:
  305. s->hpc.pred_dc((uint8_t *)src, (uint8_t *)top,
  306. (uint8_t *)left, stride, log2_size, c_idx);
  307. break;
  308. default:
  309. s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
  310. (uint8_t *)left, stride, c_idx,
  311. mode);
  312. break;
  313. }
  314. }
  315. #define INTRA_PRED(size) \
  316. static void FUNC(intra_pred_ ## size)(HEVCContext *s, int x0, int y0, int c_idx) \
  317. { \
  318. FUNC(intra_pred)(s, x0, y0, size, c_idx); \
  319. }
  320. INTRA_PRED(2)
  321. INTRA_PRED(3)
  322. INTRA_PRED(4)
  323. INTRA_PRED(5)
  324. #undef INTRA_PRED
  325. static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
  326. const uint8_t *_left, ptrdiff_t stride,
  327. int trafo_size)
  328. {
  329. int x, y;
  330. pixel *src = (pixel *)_src;
  331. const pixel *top = (const pixel *)_top;
  332. const pixel *left = (const pixel *)_left;
  333. int size = 1 << trafo_size;
  334. for (y = 0; y < size; y++)
  335. for (x = 0; x < size; x++)
  336. POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size] +
  337. (size - 1 - y) * top[x] + (y + 1) * left[size] + size) >> (trafo_size + 1);
  338. }
  339. #define PRED_PLANAR(size)\
  340. static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top, \
  341. const uint8_t *left, ptrdiff_t stride) \
  342. { \
  343. FUNC(pred_planar)(src, top, left, stride, size + 2); \
  344. }
  345. PRED_PLANAR(0)
  346. PRED_PLANAR(1)
  347. PRED_PLANAR(2)
  348. PRED_PLANAR(3)
  349. #undef PRED_PLANAR
  350. static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
  351. const uint8_t *_left,
  352. ptrdiff_t stride, int log2_size, int c_idx)
  353. {
  354. int i, j, x, y;
  355. int size = (1 << log2_size);
  356. pixel *src = (pixel *)_src;
  357. const pixel *top = (const pixel *)_top;
  358. const pixel *left = (const pixel *)_left;
  359. int dc = size;
  360. pixel4 a;
  361. for (i = 0; i < size; i++)
  362. dc += left[i] + top[i];
  363. dc >>= log2_size + 1;
  364. a = PIXEL_SPLAT_X4(dc);
  365. for (i = 0; i < size; i++)
  366. for (j = 0; j < size / 4; j++)
  367. AV_WN4PA(&POS(j * 4, i), a);
  368. if (c_idx == 0 && size < 32) {
  369. POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
  370. for (x = 1; x < size; x++)
  371. POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
  372. for (y = 1; y < size; y++)
  373. POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
  374. }
  375. }
  376. static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
  377. const uint8_t *_top,
  378. const uint8_t *_left,
  379. ptrdiff_t stride, int c_idx,
  380. int mode, int size)
  381. {
  382. int x, y;
  383. pixel *src = (pixel *)_src;
  384. const pixel *top = (const pixel *)_top;
  385. const pixel *left = (const pixel *)_left;
  386. static const int intra_pred_angle[] = {
  387. 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
  388. -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
  389. };
  390. static const int inv_angle[] = {
  391. -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
  392. -630, -910, -1638, -4096
  393. };
  394. int angle = intra_pred_angle[mode - 2];
  395. pixel ref_array[3 * MAX_TB_SIZE + 1];
  396. pixel *ref_tmp = ref_array + size;
  397. const pixel *ref;
  398. int last = (size * angle) >> 5;
  399. if (mode >= 18) {
  400. ref = top - 1;
  401. if (angle < 0 && last < -1) {
  402. for (x = 0; x <= size; x++)
  403. ref_tmp[x] = top[x - 1];
  404. for (x = last; x <= -1; x++)
  405. ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
  406. ref = ref_tmp;
  407. }
  408. for (y = 0; y < size; y++) {
  409. int idx = ((y + 1) * angle) >> 5;
  410. int fact = ((y + 1) * angle) & 31;
  411. if (fact) {
  412. for (x = 0; x < size; x++) {
  413. POS(x, y) = ((32 - fact) * ref[x + idx + 1] +
  414. fact * ref[x + idx + 2] + 16) >> 5;
  415. }
  416. } else {
  417. for (x = 0; x < size; x++)
  418. POS(x, y) = ref[x + idx + 1];
  419. }
  420. }
  421. if (mode == 26 && c_idx == 0 && size < 32) {
  422. for (y = 0; y < size; y++)
  423. POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
  424. }
  425. } else {
  426. ref = left - 1;
  427. if (angle < 0 && last < -1) {
  428. for (x = 0; x <= size; x++)
  429. ref_tmp[x] = left[x - 1];
  430. for (x = last; x <= -1; x++)
  431. ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
  432. ref = ref_tmp;
  433. }
  434. for (x = 0; x < size; x++) {
  435. int idx = ((x + 1) * angle) >> 5;
  436. int fact = ((x + 1) * angle) & 31;
  437. if (fact) {
  438. for (y = 0; y < size; y++) {
  439. POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
  440. fact * ref[y + idx + 2] + 16) >> 5;
  441. }
  442. } else {
  443. for (y = 0; y < size; y++)
  444. POS(x, y) = ref[y + idx + 1];
  445. }
  446. }
  447. if (mode == 10 && c_idx == 0 && size < 32) {
  448. for (x = 0; x < size; x++)
  449. POS(x, 0) = av_clip_pixel(left[0] + ((top[x] - top[-1]) >> 1));
  450. }
  451. }
  452. }
  453. static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
  454. const uint8_t *left,
  455. ptrdiff_t stride, int c_idx, int mode)
  456. {
  457. FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 2);
  458. }
  459. static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
  460. const uint8_t *left,
  461. ptrdiff_t stride, int c_idx, int mode)
  462. {
  463. FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 3);
  464. }
  465. static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
  466. const uint8_t *left,
  467. ptrdiff_t stride, int c_idx, int mode)
  468. {
  469. FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 4);
  470. }
  471. static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
  472. const uint8_t *left,
  473. ptrdiff_t stride, int c_idx, int mode)
  474. {
  475. FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 5);
  476. }
  477. #undef EXTEND_LEFT_CIP
  478. #undef EXTEND_RIGHT_CIP
  479. #undef EXTEND_UP_CIP
  480. #undef EXTEND_DOWN_CIP
  481. #undef IS_INTRA
  482. #undef MVF_PU
  483. #undef MVF
  484. #undef PU
  485. #undef EXTEND
  486. #undef MIN_TB_ADDR_ZS
  487. #undef POS