You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

372 lines
11KB

  1. /*
  2. * Simple IDCT
  3. *
  4. * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * simpleidct in C.
  25. */
  26. /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
  27. * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
  28. #include "simple_idct.h"
  29. #include "bit_depth_template.c"
  30. #undef W1
  31. #undef W2
  32. #undef W3
  33. #undef W4
  34. #undef W5
  35. #undef W6
  36. #undef W7
  37. #undef ROW_SHIFT
  38. #undef COL_SHIFT
  39. #undef DC_SHIFT
  40. #undef MUL
  41. #undef MAC
  42. #if BIT_DEPTH == 8
  43. #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  44. #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  45. #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  46. #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  47. #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  48. #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  49. #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
  50. #define ROW_SHIFT 11
  51. #define COL_SHIFT 20
  52. #define DC_SHIFT 3
  53. #define MUL(a, b) MUL16(a, b)
  54. #define MAC(a, b, c) MAC16(a, b, c)
  55. #elif BIT_DEPTH == 10 || BIT_DEPTH == 12
  56. # if BIT_DEPTH == 10
  57. #define W1 22725 // 90901
  58. #define W2 21407 // 85627
  59. #define W3 19265 // 77062
  60. #define W4 16384 // 65535
  61. #define W5 12873 // 51491
  62. #define W6 8867 // 35468
  63. #define W7 4520 // 18081
  64. # ifdef EXTRA_SHIFT
  65. #define ROW_SHIFT 13
  66. #define COL_SHIFT 18
  67. #define DC_SHIFT 1
  68. # elif IN_IDCT_DEPTH == 32
  69. #define ROW_SHIFT 13
  70. #define COL_SHIFT 21
  71. #define DC_SHIFT 2
  72. # else
  73. #define ROW_SHIFT 12
  74. #define COL_SHIFT 19
  75. #define DC_SHIFT 2
  76. # endif
  77. # else
  78. #define W1 45451
  79. #define W2 42813
  80. #define W3 38531
  81. #define W4 32767
  82. #define W5 25746
  83. #define W6 17734
  84. #define W7 9041
  85. #define ROW_SHIFT 16
  86. #define COL_SHIFT 17
  87. #define DC_SHIFT -1
  88. # endif
  89. #define MUL(a, b) ((a) * (b))
  90. #define MAC(a, b, c) ((a) += (b) * (c))
  91. #else
  92. #error "Unsupported bitdepth"
  93. #endif
  94. #ifdef EXTRA_SHIFT
  95. static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
  96. #else
  97. static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
  98. #endif
  99. {
  100. SUINT a0, a1, a2, a3, b0, b1, b2, b3;
  101. // TODO: Add DC-only support for int32_t input
  102. #if IN_IDCT_DEPTH == 16
  103. #if HAVE_FAST_64BIT
  104. #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
  105. if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
  106. uint64_t temp;
  107. if (DC_SHIFT - extra_shift >= 0) {
  108. temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
  109. } else {
  110. temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
  111. }
  112. temp += temp * (1 << 16);
  113. temp += temp * ((uint64_t) 1 << 32);
  114. AV_WN64A(row, temp);
  115. AV_WN64A(row + 4, temp);
  116. return;
  117. }
  118. #else
  119. if (!(AV_RN32A(row+2) |
  120. AV_RN32A(row+4) |
  121. AV_RN32A(row+6) |
  122. row[1])) {
  123. uint32_t temp;
  124. if (DC_SHIFT - extra_shift >= 0) {
  125. temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
  126. } else {
  127. temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
  128. }
  129. temp += temp * (1 << 16);
  130. AV_WN32A(row, temp);
  131. AV_WN32A(row+2, temp);
  132. AV_WN32A(row+4, temp);
  133. AV_WN32A(row+6, temp);
  134. return;
  135. }
  136. #endif
  137. #endif
  138. a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
  139. a1 = a0;
  140. a2 = a0;
  141. a3 = a0;
  142. a0 += W2 * row[2];
  143. a1 += W6 * row[2];
  144. a2 -= W6 * row[2];
  145. a3 -= W2 * row[2];
  146. b0 = MUL(W1, row[1]);
  147. MAC(b0, W3, row[3]);
  148. b1 = MUL(W3, row[1]);
  149. MAC(b1, -W7, row[3]);
  150. b2 = MUL(W5, row[1]);
  151. MAC(b2, -W1, row[3]);
  152. b3 = MUL(W7, row[1]);
  153. MAC(b3, -W5, row[3]);
  154. #if IN_IDCT_DEPTH == 32
  155. if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
  156. #else
  157. if (AV_RN64A(row + 4)) {
  158. #endif
  159. a0 += W4*row[4] + W6*row[6];
  160. a1 += - W4*row[4] - W2*row[6];
  161. a2 += - W4*row[4] + W2*row[6];
  162. a3 += W4*row[4] - W6*row[6];
  163. MAC(b0, W5, row[5]);
  164. MAC(b0, W7, row[7]);
  165. MAC(b1, -W1, row[5]);
  166. MAC(b1, -W5, row[7]);
  167. MAC(b2, W7, row[5]);
  168. MAC(b2, W3, row[7]);
  169. MAC(b3, W3, row[5]);
  170. MAC(b3, -W1, row[7]);
  171. }
  172. row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
  173. row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
  174. row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
  175. row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
  176. row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
  177. row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
  178. row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
  179. row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
  180. }
  181. #define IDCT_COLS do { \
  182. a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
  183. a1 = a0; \
  184. a2 = a0; \
  185. a3 = a0; \
  186. \
  187. a0 += W2*col[8*2]; \
  188. a1 += W6*col[8*2]; \
  189. a2 += -W6*col[8*2]; \
  190. a3 += -W2*col[8*2]; \
  191. \
  192. b0 = MUL(W1, col[8*1]); \
  193. b1 = MUL(W3, col[8*1]); \
  194. b2 = MUL(W5, col[8*1]); \
  195. b3 = MUL(W7, col[8*1]); \
  196. \
  197. MAC(b0, W3, col[8*3]); \
  198. MAC(b1, -W7, col[8*3]); \
  199. MAC(b2, -W1, col[8*3]); \
  200. MAC(b3, -W5, col[8*3]); \
  201. \
  202. if (col[8*4]) { \
  203. a0 += W4*col[8*4]; \
  204. a1 += -W4*col[8*4]; \
  205. a2 += -W4*col[8*4]; \
  206. a3 += W4*col[8*4]; \
  207. } \
  208. \
  209. if (col[8*5]) { \
  210. MAC(b0, W5, col[8*5]); \
  211. MAC(b1, -W1, col[8*5]); \
  212. MAC(b2, W7, col[8*5]); \
  213. MAC(b3, W3, col[8*5]); \
  214. } \
  215. \
  216. if (col[8*6]) { \
  217. a0 += W6*col[8*6]; \
  218. a1 += -W2*col[8*6]; \
  219. a2 += W2*col[8*6]; \
  220. a3 += -W6*col[8*6]; \
  221. } \
  222. \
  223. if (col[8*7]) { \
  224. MAC(b0, W7, col[8*7]); \
  225. MAC(b1, -W5, col[8*7]); \
  226. MAC(b2, W3, col[8*7]); \
  227. MAC(b3, -W1, col[8*7]); \
  228. } \
  229. } while (0)
  230. #ifdef EXTRA_SHIFT
  231. static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
  232. #else
  233. static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
  234. idctin *col)
  235. {
  236. SUINT a0, a1, a2, a3, b0, b1, b2, b3;
  237. IDCT_COLS;
  238. dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
  239. dest += line_size;
  240. dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
  241. dest += line_size;
  242. dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
  243. dest += line_size;
  244. dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
  245. dest += line_size;
  246. dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
  247. dest += line_size;
  248. dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
  249. dest += line_size;
  250. dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
  251. dest += line_size;
  252. dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
  253. }
  254. static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
  255. idctin *col)
  256. {
  257. int a0, a1, a2, a3, b0, b1, b2, b3;
  258. IDCT_COLS;
  259. dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
  260. dest += line_size;
  261. dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
  262. dest += line_size;
  263. dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
  264. dest += line_size;
  265. dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
  266. dest += line_size;
  267. dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
  268. dest += line_size;
  269. dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
  270. dest += line_size;
  271. dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
  272. dest += line_size;
  273. dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
  274. }
  275. static inline void FUNC6(idctSparseCol)(idctin *col)
  276. #endif
  277. {
  278. int a0, a1, a2, a3, b0, b1, b2, b3;
  279. IDCT_COLS;
  280. col[0 ] = ((a0 + b0) >> COL_SHIFT);
  281. col[8 ] = ((a1 + b1) >> COL_SHIFT);
  282. col[16] = ((a2 + b2) >> COL_SHIFT);
  283. col[24] = ((a3 + b3) >> COL_SHIFT);
  284. col[32] = ((a3 - b3) >> COL_SHIFT);
  285. col[40] = ((a2 - b2) >> COL_SHIFT);
  286. col[48] = ((a1 - b1) >> COL_SHIFT);
  287. col[56] = ((a0 - b0) >> COL_SHIFT);
  288. }
  289. #ifndef EXTRA_SHIFT
  290. void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
  291. {
  292. idctin *block = (idctin *)block_;
  293. pixel *dest = (pixel *)dest_;
  294. int i;
  295. line_size /= sizeof(pixel);
  296. for (i = 0; i < 8; i++)
  297. FUNC6(idctRowCondDC)(block + i*8, 0);
  298. for (i = 0; i < 8; i++)
  299. FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
  300. }
  301. #if IN_IDCT_DEPTH == 16
  302. void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
  303. {
  304. pixel *dest = (pixel *)dest_;
  305. int i;
  306. line_size /= sizeof(pixel);
  307. for (i = 0; i < 8; i++)
  308. FUNC6(idctRowCondDC)(block + i*8, 0);
  309. for (i = 0; i < 8; i++)
  310. FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
  311. }
  312. void FUNC6(ff_simple_idct)(int16_t *block)
  313. {
  314. int i;
  315. for (i = 0; i < 8; i++)
  316. FUNC6(idctRowCondDC)(block + i*8, 0);
  317. for (i = 0; i < 8; i++)
  318. FUNC6(idctSparseCol)(block + i);
  319. }
  320. #endif
  321. #endif