You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
15KB

  1. /*
  2. * Copyright (c) 2015 Anton Khirnov
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with Libav; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. */
  20. #include <string.h>
  21. #include "checkasm.h"
  22. #include "libavcodec/avcodec.h"
  23. #include "libavcodec/hevcdsp.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/intreadwrite.h"
  26. // max PU size + interpolation stencil
  27. #define BUF_SIZE (FFALIGN(64 + 7, 16) * (64 + 7) * 2)
  28. #define PIXEL_SIZE(depth) ((depth + 7) / 8)
  29. #define randomize_buffers(buf, size, depth) \
  30. do { \
  31. uint32_t mask = pixel_mask[depth - 8]; \
  32. int i; \
  33. for (i = 0; i < size; i += 4) { \
  34. uint32_t r = rnd() & mask; \
  35. AV_WN32A(buf + i, r); \
  36. } \
  37. } while (0)
  38. static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
  39. static const int pred_heights[][7] = {
  40. [2] = { 8, 4, 2, 0 },
  41. [4] = { 16, 8, 4, 2, 0 },
  42. [6] = { 8, 0 },
  43. [8] = { 32, 16, 8, 4, 2, 0 },
  44. [12] = { 16, 0 },
  45. [16] = { 64, 32, 16, 12, 8, 4, 0 },
  46. [24] = { 32, 0 },
  47. [32] = { 64, 32, 24, 16, 8, 0 },
  48. [48] = { 64, 0 },
  49. [64] = { 64, 48, 32, 16, 0 },
  50. };
  51. static const int pred_widths[] = { 4, 8, 12, 16, 24, 32, 48, 64 };
  52. static const char *interp_names[2][2] = { { "pixels", "h" }, { "v", "hv" } };
  53. #define UNWEIGHTED_PRED(dst0, dst1, src0, width, bit_depth) \
  54. do { \
  55. int i; \
  56. for (i = 0; i < FF_ARRAY_ELEMS(pred_heights[i]); i++) { \
  57. int height = pred_heights[width][i]; \
  58. if (!height) \
  59. break; \
  60. call_ref(dst0, dststride, src0, srcstride, height); \
  61. call_new(dst1, dststride, src0, srcstride, height); \
  62. if (memcmp(dst0, dst1, dststride * height)) \
  63. fail(); \
  64. bench_new(dst1, dststride, src0, srcstride, height); \
  65. } \
  66. } while (0)
  67. #define UNWEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth) \
  68. do { \
  69. int i; \
  70. for (i = 0; i < FF_ARRAY_ELEMS(pred_heights[i]); i++) { \
  71. int height = pred_heights[width][i]; \
  72. if (!height) \
  73. break; \
  74. call_ref(dst0, dststride, src0, src1, srcstride, height); \
  75. call_new(dst1, dststride, src0, src1, srcstride, height); \
  76. if (memcmp(dst0, dst1, dststride * height)) \
  77. fail(); \
  78. bench_new(dst1, dststride, src0, src1, srcstride, height); \
  79. } \
  80. } while (0)
  81. static void check_unweighted_pred(HEVCDSPContext *h, uint8_t *dst0, uint8_t *dst1,
  82. int16_t *src0, int16_t *src1, int bit_depth)
  83. {
  84. int i;
  85. randomize_buffers(src0, BUF_SIZE, 8);
  86. randomize_buffers(src1, BUF_SIZE, 8);
  87. memset(dst0, 0, BUF_SIZE * sizeof(*dst0));
  88. memset(dst1, 0, BUF_SIZE * sizeof(*dst1));
  89. for (i = 0; i < FF_ARRAY_ELEMS(pred_widths); i++) {
  90. const int width = pred_widths[i];
  91. const ptrdiff_t srcstride = FFALIGN(width, 16) * sizeof(*src0);
  92. const ptrdiff_t dststride = FFALIGN(width, 16) * PIXEL_SIZE(bit_depth);
  93. {
  94. declare_func(void, uint8_t *dst, ptrdiff_t dststride, int16_t *src, ptrdiff_t srcstride, int height);
  95. if (check_func(h->put_unweighted_pred[i], "put_unweighted_pred_%d_%d", width, bit_depth))
  96. UNWEIGHTED_PRED(dst0, dst1, src0, width, bit_depth);
  97. if (check_func(h->put_unweighted_pred_chroma[i], "put_unweighted_pred_%d_%d", width / 2, bit_depth))
  98. UNWEIGHTED_PRED(dst0, dst1, src0, width, bit_depth);
  99. }
  100. {
  101. declare_func(void, uint8_t *dst, ptrdiff_t dststride,
  102. int16_t *src0, int16_t *src1, ptrdiff_t srcstride, int height);
  103. if (check_func(h->put_unweighted_pred_avg[i], "put_unweighted_pred_avg_%d_%d", width, bit_depth))
  104. UNWEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth);
  105. if (check_func(h->put_unweighted_pred_avg_chroma[i], "put_unweighted_pred_avg_%d_%d", width / 2, bit_depth))
  106. UNWEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth);
  107. }
  108. }
  109. }
  110. #define WEIGHTED_PRED(dst0, dst1, src0, width, bit_depth) \
  111. do { \
  112. int i; \
  113. for (i = 0; i < FF_ARRAY_ELEMS(pred_heights[i]); i++) { \
  114. int height = pred_heights[width][i]; \
  115. if (!height) \
  116. break; \
  117. call_ref(denom, weight0, offset0, dst0, dststride, src0, srcstride, height); \
  118. call_new(denom, weight0, offset0, dst1, dststride, src0, srcstride, height); \
  119. if (memcmp(dst0, dst1, dststride * height)) \
  120. fail(); \
  121. bench_new(denom, weight0, offset0, dst1, dststride, src0, srcstride, height); \
  122. } \
  123. } while (0)
  124. #define WEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth) \
  125. do { \
  126. int i; \
  127. for (i = 0; i < FF_ARRAY_ELEMS(pred_heights[i]); i++) { \
  128. int height = pred_heights[width][i]; \
  129. if (!height) \
  130. break; \
  131. call_ref(denom, weight0, weight1, offset0, offset1, dst0, dststride, src0, src1, srcstride, height); \
  132. call_new(denom, weight0, weight1, offset0, offset1, dst1, dststride, src0, src1, srcstride, height); \
  133. if (memcmp(dst0, dst1, dststride * height)) \
  134. fail(); \
  135. bench_new(denom, weight0, weight1, offset0, offset1, dst1, dststride, src0, src1, srcstride, height); \
  136. } \
  137. } while (0)
  138. static void check_weighted_pred(HEVCDSPContext *h, uint8_t *dst0, uint8_t *dst1,
  139. int16_t *src0, int16_t *src1, int bit_depth)
  140. {
  141. uint8_t denom;
  142. int16_t weight0, weight1, offset0, offset1;
  143. int i;
  144. randomize_buffers(src0, BUF_SIZE, 8);
  145. randomize_buffers(src1, BUF_SIZE, 8);
  146. denom = rnd() & 7;
  147. weight0 = denom + ((rnd() & 255) - 128);
  148. weight1 = denom + ((rnd() & 255) - 128);
  149. offset0 = (rnd() & 255) - 128;
  150. offset1 = (rnd() & 255) - 128;
  151. memset(dst0, 0, BUF_SIZE * sizeof(*dst0));
  152. memset(dst1, 0, BUF_SIZE * sizeof(*dst1));
  153. for (i = 0; i < FF_ARRAY_ELEMS(pred_widths); i++) {
  154. const int width = pred_widths[i];
  155. const ptrdiff_t srcstride = FFALIGN(width, 16) * sizeof(*src0);
  156. const ptrdiff_t dststride = FFALIGN(width, 16) * PIXEL_SIZE(bit_depth);
  157. {
  158. declare_func(void, uint8_t denom, int16_t weight, int16_t offset,
  159. uint8_t *dst, ptrdiff_t dststride, int16_t *src, ptrdiff_t srcstride, int height);
  160. if (check_func(h->weighted_pred[i], "weighted_pred_%d_%d", width, bit_depth))
  161. WEIGHTED_PRED(dst0, dst1, src0, width, bit_depth);
  162. if (check_func(h->weighted_pred_chroma[i], "weighted_pred_%d_%d", width / 2, bit_depth))
  163. WEIGHTED_PRED(dst0, dst1, src0, width, bit_depth);
  164. }
  165. {
  166. declare_func(void, uint8_t denom, int16_t weight0, int16_t weight1, int16_t offset0, int16_t offset1,
  167. uint8_t *dst, ptrdiff_t dststride, int16_t *src0, int16_t *src1, ptrdiff_t srcstride, int height);
  168. if (check_func(h->weighted_pred_avg[i], "weighted_pred_avg_%d_%d", width, bit_depth))
  169. WEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth);
  170. if (check_func(h->weighted_pred_avg_chroma[i], "weighted_pred_avg_%d_%d", width / 2, bit_depth))
  171. WEIGHTED_PRED_AVG(dst0, dst1, src0, src1, width, bit_depth);
  172. }
  173. }
  174. }
  175. static void check_epel(HEVCDSPContext *h, int16_t *dst0, int16_t *dst1,
  176. uint8_t *src, int16_t *mcbuffer, int bit_depth)
  177. {
  178. int i, j, k, l, mx, my;
  179. declare_func(void, int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  180. int height, int mx, int my, int16_t *mcbuffer);
  181. randomize_buffers(src, BUF_SIZE, bit_depth);
  182. memset(dst0, 0, BUF_SIZE * sizeof(*dst0));
  183. memset(dst1, 0, BUF_SIZE * sizeof(*dst1));
  184. for (i = 0; i < 2; i++) {
  185. for (j = 0; j < 2; j++) {
  186. for (k = 0; k < FF_ARRAY_ELEMS(h->put_hevc_epel[i][j]); k++) {
  187. int width = pred_widths[k] / 2;
  188. ptrdiff_t dststride = FFALIGN(width, 16) * sizeof(*dst0);
  189. ptrdiff_t srcstride = FFALIGN(width + 3, 8) * PIXEL_SIZE(bit_depth);
  190. if (!check_func(h->put_hevc_epel[i][j][k], "epel_%s_%d_%d", interp_names[i][j], width, bit_depth))
  191. continue;
  192. for (l = 0; l < FF_ARRAY_ELEMS(pred_heights[0]); l++) {
  193. int height = pred_heights[width][l];
  194. if (!height)
  195. continue;
  196. for (my = i; my < (i ? 8 : 1); my++)
  197. for (mx = j; mx < (j ? 8 : 1); mx++) {
  198. call_ref(dst0, dststride, src + srcstride + PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  199. call_new(dst1, dststride, src + srcstride + PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  200. if (memcmp(dst0, dst1, dststride * height * sizeof(*dst0)))
  201. fail();
  202. bench_new(dst1, dststride, src + srcstride + PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  203. }
  204. }
  205. }
  206. }
  207. }
  208. }
  209. static void check_qpel(HEVCDSPContext *h, int16_t *dst0, int16_t *dst1,
  210. uint8_t *src, int16_t *mcbuffer, int bit_depth)
  211. {
  212. int i, j, k, l, mx, my;
  213. declare_func(void, int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  214. int height, int mx, int my, int16_t *mcbuffer);
  215. randomize_buffers(src, BUF_SIZE, bit_depth);
  216. memset(dst0, 0, BUF_SIZE * sizeof(*dst0));
  217. memset(dst1, 0, BUF_SIZE * sizeof(*dst1));
  218. for (i = 0; i < 2; i++) {
  219. for (j = 0; j < 2; j++) {
  220. for (k = 0; k < FF_ARRAY_ELEMS(h->put_hevc_qpel[i][j]); k++) {
  221. int width = pred_widths[k];
  222. ptrdiff_t dststride = FFALIGN(width, 16) * sizeof(*dst0);
  223. ptrdiff_t srcstride = FFALIGN(width + 7, 8) * PIXEL_SIZE(bit_depth);
  224. if (!check_func(h->put_hevc_qpel[i][j][k], "qpel_%s_%d_%d", interp_names[i][j], width, bit_depth))
  225. continue;
  226. for (l = 0; l < FF_ARRAY_ELEMS(pred_heights[0]); l++) {
  227. int height = pred_heights[width][l];
  228. if (!height)
  229. continue;
  230. for (my = i; my < (i ? 2 : 1); my++)
  231. for (mx = j; mx < (j ? 2 : 1); mx++) {
  232. call_ref(dst0, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  233. call_new(dst1, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  234. if (memcmp(dst0, dst1, dststride * height * sizeof(*dst0)))
  235. fail();
  236. bench_new(dst1, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer);
  237. }
  238. }
  239. }
  240. }
  241. }
  242. }
  243. void checkasm_check_hevc_mc(void)
  244. {
  245. DECLARE_ALIGNED(16, uint8_t, buf8_0)[BUF_SIZE];
  246. DECLARE_ALIGNED(16, uint8_t, buf8_1)[BUF_SIZE];
  247. DECLARE_ALIGNED(16, int16_t, buf16_0)[BUF_SIZE];
  248. DECLARE_ALIGNED(16, int16_t, buf16_1)[BUF_SIZE];
  249. DECLARE_ALIGNED(16, int16_t, mcbuffer)[BUF_SIZE];
  250. HEVCDSPContext h;
  251. int bit_depth;
  252. for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
  253. ff_hevc_dsp_init(&h, bit_depth);
  254. check_qpel(&h, buf16_0, buf16_1, buf8_0, mcbuffer, bit_depth);
  255. }
  256. report("qpel");
  257. for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
  258. ff_hevc_dsp_init(&h, bit_depth);
  259. check_epel(&h, buf16_0, buf16_1, buf8_0, mcbuffer, bit_depth);
  260. }
  261. report("epel");
  262. for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
  263. ff_hevc_dsp_init(&h, bit_depth);
  264. check_unweighted_pred(&h, buf8_0, buf8_1, buf16_0, buf16_1, bit_depth);
  265. }
  266. report("unweighted_pred");
  267. for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
  268. ff_hevc_dsp_init(&h, bit_depth);
  269. check_weighted_pred(&h, buf8_0, buf8_1, buf16_0, buf16_1, bit_depth);
  270. }
  271. report("weighted_pred");
  272. }