You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1170 lines
33KB

  1. /*
  2. * jrevdct.c
  3. *
  4. * Copyright (C) 1991, 1992, Thomas G. Lane.
  5. * This file is part of the Independent JPEG Group's software.
  6. * For conditions of distribution and use, see the accompanying README file.
  7. *
  8. * This file contains the basic inverse-DCT transformation subroutine.
  9. *
  10. * This implementation is based on an algorithm described in
  11. * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
  12. * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
  13. * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
  14. * The primary algorithm described there uses 11 multiplies and 29 adds.
  15. * We use their alternate method with 12 multiplies and 32 adds.
  16. * The advantage of this method is that no data path contains more than one
  17. * multiplication; this allows a very simple and accurate implementation in
  18. * scaled fixed-point arithmetic, with a minimal number of shifts.
  19. *
  20. * I've made lots of modifications to attempt to take advantage of the
  21. * sparse nature of the DCT matrices we're getting. Although the logic
  22. * is cumbersome, it's straightforward and the resulting code is much
  23. * faster.
  24. *
  25. * A better way to do this would be to pass in the DCT block as a sparse
  26. * matrix, perhaps with the difference cases encoded.
  27. */
  28. #include "common.h"
  29. #include "dsputil.h"
  30. #define EIGHT_BIT_SAMPLES
  31. #define DCTSIZE 8
  32. #define DCTSIZE2 64
  33. #define GLOBAL
  34. #define RIGHT_SHIFT(x, n) ((x) >> (n))
  35. typedef DCTELEM DCTBLOCK[DCTSIZE2];
  36. #define CONST_BITS 13
  37. /*
  38. * This routine is specialized to the case DCTSIZE = 8.
  39. */
  40. #if DCTSIZE != 8
  41. Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
  42. #endif
  43. /*
  44. * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
  45. * on each column. Direct algorithms are also available, but they are
  46. * much more complex and seem not to be any faster when reduced to code.
  47. *
  48. * The poop on this scaling stuff is as follows:
  49. *
  50. * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
  51. * larger than the true IDCT outputs. The final outputs are therefore
  52. * a factor of N larger than desired; since N=8 this can be cured by
  53. * a simple right shift at the end of the algorithm. The advantage of
  54. * this arrangement is that we save two multiplications per 1-D IDCT,
  55. * because the y0 and y4 inputs need not be divided by sqrt(N).
  56. *
  57. * We have to do addition and subtraction of the integer inputs, which
  58. * is no problem, and multiplication by fractional constants, which is
  59. * a problem to do in integer arithmetic. We multiply all the constants
  60. * by CONST_SCALE and convert them to integer constants (thus retaining
  61. * CONST_BITS bits of precision in the constants). After doing a
  62. * multiplication we have to divide the product by CONST_SCALE, with proper
  63. * rounding, to produce the correct output. This division can be done
  64. * cheaply as a right shift of CONST_BITS bits. We postpone shifting
  65. * as long as possible so that partial sums can be added together with
  66. * full fractional precision.
  67. *
  68. * The outputs of the first pass are scaled up by PASS1_BITS bits so that
  69. * they are represented to better-than-integral precision. These outputs
  70. * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  71. * with the recommended scaling. (To scale up 12-bit sample data further, an
  72. * intermediate int32 array would be needed.)
  73. *
  74. * To avoid overflow of the 32-bit intermediate results in pass 2, we must
  75. * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
  76. * shows that the values given below are the most effective.
  77. */
  78. #ifdef EIGHT_BIT_SAMPLES
  79. #define PASS1_BITS 2
  80. #else
  81. #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
  82. #endif
  83. #define ONE ((INT32) 1)
  84. #define CONST_SCALE (ONE << CONST_BITS)
  85. /* Convert a positive real constant to an integer scaled by CONST_SCALE.
  86. * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
  87. * you will pay a significant penalty in run time. In that case, figure
  88. * the correct integer constant values and insert them by hand.
  89. */
  90. /* Actually FIX is no longer used, we precomputed them all */
  91. #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))
  92. /* Descale and correctly round an INT32 value that's scaled by N bits.
  93. * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  94. * the fudge factor is correct for either sign of X.
  95. */
  96. #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
  97. /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
  98. * For 8-bit samples with the recommended scaling, all the variable
  99. * and constant values involved are no more than 16 bits wide, so a
  100. * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
  101. * this provides a useful speedup on many machines.
  102. * There is no way to specify a 16x16->32 multiply in portable C, but
  103. * some C compilers will do the right thing if you provide the correct
  104. * combination of casts.
  105. * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
  106. */
  107. #ifdef EIGHT_BIT_SAMPLES
  108. #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
  109. #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
  110. #endif
  111. #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
  112. #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))
  113. #endif
  114. #endif
  115. #ifndef MULTIPLY /* default definition */
  116. #define MULTIPLY(var,const) ((var) * (const))
  117. #endif
  118. /*
  119. Unlike our decoder where we approximate the FIXes, we need to use exact
  120. ones here or successive P-frames will drift too much with Reference frame coding
  121. */
  122. #define FIX_0_211164243 1730
  123. #define FIX_0_275899380 2260
  124. #define FIX_0_298631336 2446
  125. #define FIX_0_390180644 3196
  126. #define FIX_0_509795579 4176
  127. #define FIX_0_541196100 4433
  128. #define FIX_0_601344887 4926
  129. #define FIX_0_765366865 6270
  130. #define FIX_0_785694958 6436
  131. #define FIX_0_899976223 7373
  132. #define FIX_1_061594337 8697
  133. #define FIX_1_111140466 9102
  134. #define FIX_1_175875602 9633
  135. #define FIX_1_306562965 10703
  136. #define FIX_1_387039845 11363
  137. #define FIX_1_451774981 11893
  138. #define FIX_1_501321110 12299
  139. #define FIX_1_662939225 13623
  140. #define FIX_1_847759065 15137
  141. #define FIX_1_961570560 16069
  142. #define FIX_2_053119869 16819
  143. #define FIX_2_172734803 17799
  144. #define FIX_2_562915447 20995
  145. #define FIX_3_072711026 25172
  146. /*
  147. * Perform the inverse DCT on one block of coefficients.
  148. */
  149. void j_rev_dct(DCTBLOCK data)
  150. {
  151. INT32 tmp0, tmp1, tmp2, tmp3;
  152. INT32 tmp10, tmp11, tmp12, tmp13;
  153. INT32 z1, z2, z3, z4, z5;
  154. INT32 d0, d1, d2, d3, d4, d5, d6, d7;
  155. register DCTELEM *dataptr;
  156. int rowctr;
  157. /* Pass 1: process rows. */
  158. /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
  159. /* furthermore, we scale the results by 2**PASS1_BITS. */
  160. dataptr = data;
  161. for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
  162. /* Due to quantization, we will usually find that many of the input
  163. * coefficients are zero, especially the AC terms. We can exploit this
  164. * by short-circuiting the IDCT calculation for any row in which all
  165. * the AC terms are zero. In that case each output is equal to the
  166. * DC coefficient (with scale factor as needed).
  167. * With typical images and quantization tables, half or more of the
  168. * row DCT calculations can be simplified this way.
  169. */
  170. register int *idataptr = (int*)dataptr;
  171. /* WARNING: we do the same permutation as MMX idct to simplify the
  172. video core */
  173. d0 = dataptr[0];
  174. d2 = dataptr[1];
  175. d4 = dataptr[2];
  176. d6 = dataptr[3];
  177. d1 = dataptr[4];
  178. d3 = dataptr[5];
  179. d5 = dataptr[6];
  180. d7 = dataptr[7];
  181. if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
  182. /* AC terms all zero */
  183. if (d0) {
  184. /* Compute a 32 bit value to assign. */
  185. DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
  186. register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
  187. idataptr[0] = v;
  188. idataptr[1] = v;
  189. idataptr[2] = v;
  190. idataptr[3] = v;
  191. }
  192. dataptr += DCTSIZE; /* advance pointer to next row */
  193. continue;
  194. }
  195. /* Even part: reverse the even part of the forward DCT. */
  196. /* The rotator is sqrt(2)*c(-6). */
  197. {
  198. if (d6) {
  199. if (d4) {
  200. if (d2) {
  201. if (d0) {
  202. /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
  203. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  204. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  205. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  206. tmp0 = (d0 + d4) << CONST_BITS;
  207. tmp1 = (d0 - d4) << CONST_BITS;
  208. tmp10 = tmp0 + tmp3;
  209. tmp13 = tmp0 - tmp3;
  210. tmp11 = tmp1 + tmp2;
  211. tmp12 = tmp1 - tmp2;
  212. } else {
  213. /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
  214. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  215. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  216. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  217. tmp0 = d4 << CONST_BITS;
  218. tmp10 = tmp0 + tmp3;
  219. tmp13 = tmp0 - tmp3;
  220. tmp11 = tmp2 - tmp0;
  221. tmp12 = -(tmp0 + tmp2);
  222. }
  223. } else {
  224. if (d0) {
  225. /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
  226. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  227. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  228. tmp0 = (d0 + d4) << CONST_BITS;
  229. tmp1 = (d0 - d4) << CONST_BITS;
  230. tmp10 = tmp0 + tmp3;
  231. tmp13 = tmp0 - tmp3;
  232. tmp11 = tmp1 + tmp2;
  233. tmp12 = tmp1 - tmp2;
  234. } else {
  235. /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
  236. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  237. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  238. tmp0 = d4 << CONST_BITS;
  239. tmp10 = tmp0 + tmp3;
  240. tmp13 = tmp0 - tmp3;
  241. tmp11 = tmp2 - tmp0;
  242. tmp12 = -(tmp0 + tmp2);
  243. }
  244. }
  245. } else {
  246. if (d2) {
  247. if (d0) {
  248. /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
  249. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  250. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  251. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  252. tmp0 = d0 << CONST_BITS;
  253. tmp10 = tmp0 + tmp3;
  254. tmp13 = tmp0 - tmp3;
  255. tmp11 = tmp0 + tmp2;
  256. tmp12 = tmp0 - tmp2;
  257. } else {
  258. /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
  259. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  260. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  261. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  262. tmp10 = tmp3;
  263. tmp13 = -tmp3;
  264. tmp11 = tmp2;
  265. tmp12 = -tmp2;
  266. }
  267. } else {
  268. if (d0) {
  269. /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
  270. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  271. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  272. tmp0 = d0 << CONST_BITS;
  273. tmp10 = tmp0 + tmp3;
  274. tmp13 = tmp0 - tmp3;
  275. tmp11 = tmp0 + tmp2;
  276. tmp12 = tmp0 - tmp2;
  277. } else {
  278. /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
  279. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  280. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  281. tmp10 = tmp3;
  282. tmp13 = -tmp3;
  283. tmp11 = tmp2;
  284. tmp12 = -tmp2;
  285. }
  286. }
  287. }
  288. } else {
  289. if (d4) {
  290. if (d2) {
  291. if (d0) {
  292. /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
  293. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  294. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  295. tmp0 = (d0 + d4) << CONST_BITS;
  296. tmp1 = (d0 - d4) << CONST_BITS;
  297. tmp10 = tmp0 + tmp3;
  298. tmp13 = tmp0 - tmp3;
  299. tmp11 = tmp1 + tmp2;
  300. tmp12 = tmp1 - tmp2;
  301. } else {
  302. /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
  303. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  304. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  305. tmp0 = d4 << CONST_BITS;
  306. tmp10 = tmp0 + tmp3;
  307. tmp13 = tmp0 - tmp3;
  308. tmp11 = tmp2 - tmp0;
  309. tmp12 = -(tmp0 + tmp2);
  310. }
  311. } else {
  312. if (d0) {
  313. /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
  314. tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
  315. tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
  316. } else {
  317. /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
  318. tmp10 = tmp13 = d4 << CONST_BITS;
  319. tmp11 = tmp12 = -tmp10;
  320. }
  321. }
  322. } else {
  323. if (d2) {
  324. if (d0) {
  325. /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
  326. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  327. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  328. tmp0 = d0 << CONST_BITS;
  329. tmp10 = tmp0 + tmp3;
  330. tmp13 = tmp0 - tmp3;
  331. tmp11 = tmp0 + tmp2;
  332. tmp12 = tmp0 - tmp2;
  333. } else {
  334. /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
  335. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  336. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  337. tmp10 = tmp3;
  338. tmp13 = -tmp3;
  339. tmp11 = tmp2;
  340. tmp12 = -tmp2;
  341. }
  342. } else {
  343. if (d0) {
  344. /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
  345. tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
  346. } else {
  347. /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
  348. tmp10 = tmp13 = tmp11 = tmp12 = 0;
  349. }
  350. }
  351. }
  352. }
  353. /* Odd part per figure 8; the matrix is unitary and hence its
  354. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  355. */
  356. if (d7) {
  357. if (d5) {
  358. if (d3) {
  359. if (d1) {
  360. /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
  361. z1 = d7 + d1;
  362. z2 = d5 + d3;
  363. z3 = d7 + d3;
  364. z4 = d5 + d1;
  365. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  366. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  367. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  368. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  369. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  370. z1 = MULTIPLY(-z1, FIX_0_899976223);
  371. z2 = MULTIPLY(-z2, FIX_2_562915447);
  372. z3 = MULTIPLY(-z3, FIX_1_961570560);
  373. z4 = MULTIPLY(-z4, FIX_0_390180644);
  374. z3 += z5;
  375. z4 += z5;
  376. tmp0 += z1 + z3;
  377. tmp1 += z2 + z4;
  378. tmp2 += z2 + z3;
  379. tmp3 += z1 + z4;
  380. } else {
  381. /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
  382. z2 = d5 + d3;
  383. z3 = d7 + d3;
  384. z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
  385. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  386. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  387. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  388. z1 = MULTIPLY(-d7, FIX_0_899976223);
  389. z2 = MULTIPLY(-z2, FIX_2_562915447);
  390. z3 = MULTIPLY(-z3, FIX_1_961570560);
  391. z4 = MULTIPLY(-d5, FIX_0_390180644);
  392. z3 += z5;
  393. z4 += z5;
  394. tmp0 += z1 + z3;
  395. tmp1 += z2 + z4;
  396. tmp2 += z2 + z3;
  397. tmp3 = z1 + z4;
  398. }
  399. } else {
  400. if (d1) {
  401. /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
  402. z1 = d7 + d1;
  403. z4 = d5 + d1;
  404. z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
  405. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  406. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  407. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  408. z1 = MULTIPLY(-z1, FIX_0_899976223);
  409. z2 = MULTIPLY(-d5, FIX_2_562915447);
  410. z3 = MULTIPLY(-d7, FIX_1_961570560);
  411. z4 = MULTIPLY(-z4, FIX_0_390180644);
  412. z3 += z5;
  413. z4 += z5;
  414. tmp0 += z1 + z3;
  415. tmp1 += z2 + z4;
  416. tmp2 = z2 + z3;
  417. tmp3 += z1 + z4;
  418. } else {
  419. /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
  420. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  421. z1 = MULTIPLY(-d7, FIX_0_899976223);
  422. z3 = MULTIPLY(-d7, FIX_1_961570560);
  423. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  424. z2 = MULTIPLY(-d5, FIX_2_562915447);
  425. z4 = MULTIPLY(-d5, FIX_0_390180644);
  426. z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
  427. z3 += z5;
  428. z4 += z5;
  429. tmp0 += z3;
  430. tmp1 += z4;
  431. tmp2 = z2 + z3;
  432. tmp3 = z1 + z4;
  433. }
  434. }
  435. } else {
  436. if (d3) {
  437. if (d1) {
  438. /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
  439. z1 = d7 + d1;
  440. z3 = d7 + d3;
  441. z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
  442. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  443. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  444. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  445. z1 = MULTIPLY(-z1, FIX_0_899976223);
  446. z2 = MULTIPLY(-d3, FIX_2_562915447);
  447. z3 = MULTIPLY(-z3, FIX_1_961570560);
  448. z4 = MULTIPLY(-d1, FIX_0_390180644);
  449. z3 += z5;
  450. z4 += z5;
  451. tmp0 += z1 + z3;
  452. tmp1 = z2 + z4;
  453. tmp2 += z2 + z3;
  454. tmp3 += z1 + z4;
  455. } else {
  456. /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
  457. z3 = d7 + d3;
  458. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  459. z1 = MULTIPLY(-d7, FIX_0_899976223);
  460. tmp2 = MULTIPLY(d3, FIX_0_509795579);
  461. z2 = MULTIPLY(-d3, FIX_2_562915447);
  462. z5 = MULTIPLY(z3, FIX_1_175875602);
  463. z3 = MULTIPLY(-z3, FIX_0_785694958);
  464. tmp0 += z3;
  465. tmp1 = z2 + z5;
  466. tmp2 += z3;
  467. tmp3 = z1 + z5;
  468. }
  469. } else {
  470. if (d1) {
  471. /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
  472. z1 = d7 + d1;
  473. z5 = MULTIPLY(z1, FIX_1_175875602);
  474. z1 = MULTIPLY(z1, FIX_0_275899380);
  475. z3 = MULTIPLY(-d7, FIX_1_961570560);
  476. tmp0 = MULTIPLY(-d7, FIX_1_662939225);
  477. z4 = MULTIPLY(-d1, FIX_0_390180644);
  478. tmp3 = MULTIPLY(d1, FIX_1_111140466);
  479. tmp0 += z1;
  480. tmp1 = z4 + z5;
  481. tmp2 = z3 + z5;
  482. tmp3 += z1;
  483. } else {
  484. /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
  485. tmp0 = MULTIPLY(-d7, FIX_1_387039845);
  486. tmp1 = MULTIPLY(d7, FIX_1_175875602);
  487. tmp2 = MULTIPLY(-d7, FIX_0_785694958);
  488. tmp3 = MULTIPLY(d7, FIX_0_275899380);
  489. }
  490. }
  491. }
  492. } else {
  493. if (d5) {
  494. if (d3) {
  495. if (d1) {
  496. /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
  497. z2 = d5 + d3;
  498. z4 = d5 + d1;
  499. z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
  500. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  501. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  502. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  503. z1 = MULTIPLY(-d1, FIX_0_899976223);
  504. z2 = MULTIPLY(-z2, FIX_2_562915447);
  505. z3 = MULTIPLY(-d3, FIX_1_961570560);
  506. z4 = MULTIPLY(-z4, FIX_0_390180644);
  507. z3 += z5;
  508. z4 += z5;
  509. tmp0 = z1 + z3;
  510. tmp1 += z2 + z4;
  511. tmp2 += z2 + z3;
  512. tmp3 += z1 + z4;
  513. } else {
  514. /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
  515. z2 = d5 + d3;
  516. z5 = MULTIPLY(z2, FIX_1_175875602);
  517. tmp1 = MULTIPLY(d5, FIX_1_662939225);
  518. z4 = MULTIPLY(-d5, FIX_0_390180644);
  519. z2 = MULTIPLY(-z2, FIX_1_387039845);
  520. tmp2 = MULTIPLY(d3, FIX_1_111140466);
  521. z3 = MULTIPLY(-d3, FIX_1_961570560);
  522. tmp0 = z3 + z5;
  523. tmp1 += z2;
  524. tmp2 += z2;
  525. tmp3 = z4 + z5;
  526. }
  527. } else {
  528. if (d1) {
  529. /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
  530. z4 = d5 + d1;
  531. z5 = MULTIPLY(z4, FIX_1_175875602);
  532. z1 = MULTIPLY(-d1, FIX_0_899976223);
  533. tmp3 = MULTIPLY(d1, FIX_0_601344887);
  534. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  535. z2 = MULTIPLY(-d5, FIX_2_562915447);
  536. z4 = MULTIPLY(z4, FIX_0_785694958);
  537. tmp0 = z1 + z5;
  538. tmp1 += z4;
  539. tmp2 = z2 + z5;
  540. tmp3 += z4;
  541. } else {
  542. /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
  543. tmp0 = MULTIPLY(d5, FIX_1_175875602);
  544. tmp1 = MULTIPLY(d5, FIX_0_275899380);
  545. tmp2 = MULTIPLY(-d5, FIX_1_387039845);
  546. tmp3 = MULTIPLY(d5, FIX_0_785694958);
  547. }
  548. }
  549. } else {
  550. if (d3) {
  551. if (d1) {
  552. /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
  553. z5 = d1 + d3;
  554. tmp3 = MULTIPLY(d1, FIX_0_211164243);
  555. tmp2 = MULTIPLY(-d3, FIX_1_451774981);
  556. z1 = MULTIPLY(d1, FIX_1_061594337);
  557. z2 = MULTIPLY(-d3, FIX_2_172734803);
  558. z4 = MULTIPLY(z5, FIX_0_785694958);
  559. z5 = MULTIPLY(z5, FIX_1_175875602);
  560. tmp0 = z1 - z4;
  561. tmp1 = z2 + z4;
  562. tmp2 += z5;
  563. tmp3 += z5;
  564. } else {
  565. /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
  566. tmp0 = MULTIPLY(-d3, FIX_0_785694958);
  567. tmp1 = MULTIPLY(-d3, FIX_1_387039845);
  568. tmp2 = MULTIPLY(-d3, FIX_0_275899380);
  569. tmp3 = MULTIPLY(d3, FIX_1_175875602);
  570. }
  571. } else {
  572. if (d1) {
  573. /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
  574. tmp0 = MULTIPLY(d1, FIX_0_275899380);
  575. tmp1 = MULTIPLY(d1, FIX_0_785694958);
  576. tmp2 = MULTIPLY(d1, FIX_1_175875602);
  577. tmp3 = MULTIPLY(d1, FIX_1_387039845);
  578. } else {
  579. /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
  580. tmp0 = tmp1 = tmp2 = tmp3 = 0;
  581. }
  582. }
  583. }
  584. }
  585. }
  586. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  587. dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
  588. dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
  589. dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
  590. dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
  591. dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
  592. dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
  593. dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
  594. dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
  595. dataptr += DCTSIZE; /* advance pointer to next row */
  596. }
  597. /* Pass 2: process columns. */
  598. /* Note that we must descale the results by a factor of 8 == 2**3, */
  599. /* and also undo the PASS1_BITS scaling. */
  600. dataptr = data;
  601. for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
  602. /* Columns of zeroes can be exploited in the same way as we did with rows.
  603. * However, the row calculation has created many nonzero AC terms, so the
  604. * simplification applies less often (typically 5% to 10% of the time).
  605. * On machines with very fast multiplication, it's possible that the
  606. * test takes more time than it's worth. In that case this section
  607. * may be commented out.
  608. */
  609. d0 = dataptr[DCTSIZE*0];
  610. d1 = dataptr[DCTSIZE*1];
  611. d2 = dataptr[DCTSIZE*2];
  612. d3 = dataptr[DCTSIZE*3];
  613. d4 = dataptr[DCTSIZE*4];
  614. d5 = dataptr[DCTSIZE*5];
  615. d6 = dataptr[DCTSIZE*6];
  616. d7 = dataptr[DCTSIZE*7];
  617. /* Even part: reverse the even part of the forward DCT. */
  618. /* The rotator is sqrt(2)*c(-6). */
  619. if (d6) {
  620. if (d4) {
  621. if (d2) {
  622. if (d0) {
  623. /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
  624. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  625. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  626. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  627. tmp0 = (d0 + d4) << CONST_BITS;
  628. tmp1 = (d0 - d4) << CONST_BITS;
  629. tmp10 = tmp0 + tmp3;
  630. tmp13 = tmp0 - tmp3;
  631. tmp11 = tmp1 + tmp2;
  632. tmp12 = tmp1 - tmp2;
  633. } else {
  634. /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
  635. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  636. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  637. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  638. tmp0 = d4 << CONST_BITS;
  639. tmp10 = tmp0 + tmp3;
  640. tmp13 = tmp0 - tmp3;
  641. tmp11 = tmp2 - tmp0;
  642. tmp12 = -(tmp0 + tmp2);
  643. }
  644. } else {
  645. if (d0) {
  646. /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
  647. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  648. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  649. tmp0 = (d0 + d4) << CONST_BITS;
  650. tmp1 = (d0 - d4) << CONST_BITS;
  651. tmp10 = tmp0 + tmp3;
  652. tmp13 = tmp0 - tmp3;
  653. tmp11 = tmp1 + tmp2;
  654. tmp12 = tmp1 - tmp2;
  655. } else {
  656. /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
  657. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  658. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  659. tmp0 = d4 << CONST_BITS;
  660. tmp10 = tmp0 + tmp3;
  661. tmp13 = tmp0 - tmp3;
  662. tmp11 = tmp2 - tmp0;
  663. tmp12 = -(tmp0 + tmp2);
  664. }
  665. }
  666. } else {
  667. if (d2) {
  668. if (d0) {
  669. /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
  670. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  671. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  672. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  673. tmp0 = d0 << CONST_BITS;
  674. tmp10 = tmp0 + tmp3;
  675. tmp13 = tmp0 - tmp3;
  676. tmp11 = tmp0 + tmp2;
  677. tmp12 = tmp0 - tmp2;
  678. } else {
  679. /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
  680. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  681. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  682. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  683. tmp10 = tmp3;
  684. tmp13 = -tmp3;
  685. tmp11 = tmp2;
  686. tmp12 = -tmp2;
  687. }
  688. } else {
  689. if (d0) {
  690. /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
  691. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  692. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  693. tmp0 = d0 << CONST_BITS;
  694. tmp10 = tmp0 + tmp3;
  695. tmp13 = tmp0 - tmp3;
  696. tmp11 = tmp0 + tmp2;
  697. tmp12 = tmp0 - tmp2;
  698. } else {
  699. /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
  700. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  701. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  702. tmp10 = tmp3;
  703. tmp13 = -tmp3;
  704. tmp11 = tmp2;
  705. tmp12 = -tmp2;
  706. }
  707. }
  708. }
  709. } else {
  710. if (d4) {
  711. if (d2) {
  712. if (d0) {
  713. /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
  714. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  715. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  716. tmp0 = (d0 + d4) << CONST_BITS;
  717. tmp1 = (d0 - d4) << CONST_BITS;
  718. tmp10 = tmp0 + tmp3;
  719. tmp13 = tmp0 - tmp3;
  720. tmp11 = tmp1 + tmp2;
  721. tmp12 = tmp1 - tmp2;
  722. } else {
  723. /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
  724. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  725. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  726. tmp0 = d4 << CONST_BITS;
  727. tmp10 = tmp0 + tmp3;
  728. tmp13 = tmp0 - tmp3;
  729. tmp11 = tmp2 - tmp0;
  730. tmp12 = -(tmp0 + tmp2);
  731. }
  732. } else {
  733. if (d0) {
  734. /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
  735. tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
  736. tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
  737. } else {
  738. /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
  739. tmp10 = tmp13 = d4 << CONST_BITS;
  740. tmp11 = tmp12 = -tmp10;
  741. }
  742. }
  743. } else {
  744. if (d2) {
  745. if (d0) {
  746. /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
  747. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  748. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  749. tmp0 = d0 << CONST_BITS;
  750. tmp10 = tmp0 + tmp3;
  751. tmp13 = tmp0 - tmp3;
  752. tmp11 = tmp0 + tmp2;
  753. tmp12 = tmp0 - tmp2;
  754. } else {
  755. /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
  756. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  757. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  758. tmp10 = tmp3;
  759. tmp13 = -tmp3;
  760. tmp11 = tmp2;
  761. tmp12 = -tmp2;
  762. }
  763. } else {
  764. if (d0) {
  765. /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
  766. tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
  767. } else {
  768. /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
  769. tmp10 = tmp13 = tmp11 = tmp12 = 0;
  770. }
  771. }
  772. }
  773. }
  774. /* Odd part per figure 8; the matrix is unitary and hence its
  775. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  776. */
  777. if (d7) {
  778. if (d5) {
  779. if (d3) {
  780. if (d1) {
  781. /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
  782. z1 = d7 + d1;
  783. z2 = d5 + d3;
  784. z3 = d7 + d3;
  785. z4 = d5 + d1;
  786. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  787. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  788. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  789. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  790. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  791. z1 = MULTIPLY(-z1, FIX_0_899976223);
  792. z2 = MULTIPLY(-z2, FIX_2_562915447);
  793. z3 = MULTIPLY(-z3, FIX_1_961570560);
  794. z4 = MULTIPLY(-z4, FIX_0_390180644);
  795. z3 += z5;
  796. z4 += z5;
  797. tmp0 += z1 + z3;
  798. tmp1 += z2 + z4;
  799. tmp2 += z2 + z3;
  800. tmp3 += z1 + z4;
  801. } else {
  802. /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
  803. z1 = d7;
  804. z2 = d5 + d3;
  805. z3 = d7 + d3;
  806. z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
  807. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  808. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  809. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  810. z1 = MULTIPLY(-d7, FIX_0_899976223);
  811. z2 = MULTIPLY(-z2, FIX_2_562915447);
  812. z3 = MULTIPLY(-z3, FIX_1_961570560);
  813. z4 = MULTIPLY(-d5, FIX_0_390180644);
  814. z3 += z5;
  815. z4 += z5;
  816. tmp0 += z1 + z3;
  817. tmp1 += z2 + z4;
  818. tmp2 += z2 + z3;
  819. tmp3 = z1 + z4;
  820. }
  821. } else {
  822. if (d1) {
  823. /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
  824. z1 = d7 + d1;
  825. z2 = d5;
  826. z3 = d7;
  827. z4 = d5 + d1;
  828. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  829. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  830. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  831. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  832. z1 = MULTIPLY(-z1, FIX_0_899976223);
  833. z2 = MULTIPLY(-d5, FIX_2_562915447);
  834. z3 = MULTIPLY(-d7, FIX_1_961570560);
  835. z4 = MULTIPLY(-z4, FIX_0_390180644);
  836. z3 += z5;
  837. z4 += z5;
  838. tmp0 += z1 + z3;
  839. tmp1 += z2 + z4;
  840. tmp2 = z2 + z3;
  841. tmp3 += z1 + z4;
  842. } else {
  843. /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
  844. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  845. z1 = MULTIPLY(-d7, FIX_0_899976223);
  846. z3 = MULTIPLY(-d7, FIX_1_961570560);
  847. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  848. z2 = MULTIPLY(-d5, FIX_2_562915447);
  849. z4 = MULTIPLY(-d5, FIX_0_390180644);
  850. z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
  851. z3 += z5;
  852. z4 += z5;
  853. tmp0 += z3;
  854. tmp1 += z4;
  855. tmp2 = z2 + z3;
  856. tmp3 = z1 + z4;
  857. }
  858. }
  859. } else {
  860. if (d3) {
  861. if (d1) {
  862. /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
  863. z1 = d7 + d1;
  864. z3 = d7 + d3;
  865. z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
  866. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  867. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  868. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  869. z1 = MULTIPLY(-z1, FIX_0_899976223);
  870. z2 = MULTIPLY(-d3, FIX_2_562915447);
  871. z3 = MULTIPLY(-z3, FIX_1_961570560);
  872. z4 = MULTIPLY(-d1, FIX_0_390180644);
  873. z3 += z5;
  874. z4 += z5;
  875. tmp0 += z1 + z3;
  876. tmp1 = z2 + z4;
  877. tmp2 += z2 + z3;
  878. tmp3 += z1 + z4;
  879. } else {
  880. /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
  881. z3 = d7 + d3;
  882. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  883. z1 = MULTIPLY(-d7, FIX_0_899976223);
  884. tmp2 = MULTIPLY(d3, FIX_0_509795579);
  885. z2 = MULTIPLY(-d3, FIX_2_562915447);
  886. z5 = MULTIPLY(z3, FIX_1_175875602);
  887. z3 = MULTIPLY(-z3, FIX_0_785694958);
  888. tmp0 += z3;
  889. tmp1 = z2 + z5;
  890. tmp2 += z3;
  891. tmp3 = z1 + z5;
  892. }
  893. } else {
  894. if (d1) {
  895. /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
  896. z1 = d7 + d1;
  897. z5 = MULTIPLY(z1, FIX_1_175875602);
  898. z1 = MULTIPLY(z1, FIX_0_275899380);
  899. z3 = MULTIPLY(-d7, FIX_1_961570560);
  900. tmp0 = MULTIPLY(-d7, FIX_1_662939225);
  901. z4 = MULTIPLY(-d1, FIX_0_390180644);
  902. tmp3 = MULTIPLY(d1, FIX_1_111140466);
  903. tmp0 += z1;
  904. tmp1 = z4 + z5;
  905. tmp2 = z3 + z5;
  906. tmp3 += z1;
  907. } else {
  908. /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
  909. tmp0 = MULTIPLY(-d7, FIX_1_387039845);
  910. tmp1 = MULTIPLY(d7, FIX_1_175875602);
  911. tmp2 = MULTIPLY(-d7, FIX_0_785694958);
  912. tmp3 = MULTIPLY(d7, FIX_0_275899380);
  913. }
  914. }
  915. }
  916. } else {
  917. if (d5) {
  918. if (d3) {
  919. if (d1) {
  920. /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
  921. z2 = d5 + d3;
  922. z4 = d5 + d1;
  923. z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
  924. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  925. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  926. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  927. z1 = MULTIPLY(-d1, FIX_0_899976223);
  928. z2 = MULTIPLY(-z2, FIX_2_562915447);
  929. z3 = MULTIPLY(-d3, FIX_1_961570560);
  930. z4 = MULTIPLY(-z4, FIX_0_390180644);
  931. z3 += z5;
  932. z4 += z5;
  933. tmp0 = z1 + z3;
  934. tmp1 += z2 + z4;
  935. tmp2 += z2 + z3;
  936. tmp3 += z1 + z4;
  937. } else {
  938. /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
  939. z2 = d5 + d3;
  940. z5 = MULTIPLY(z2, FIX_1_175875602);
  941. tmp1 = MULTIPLY(d5, FIX_1_662939225);
  942. z4 = MULTIPLY(-d5, FIX_0_390180644);
  943. z2 = MULTIPLY(-z2, FIX_1_387039845);
  944. tmp2 = MULTIPLY(d3, FIX_1_111140466);
  945. z3 = MULTIPLY(-d3, FIX_1_961570560);
  946. tmp0 = z3 + z5;
  947. tmp1 += z2;
  948. tmp2 += z2;
  949. tmp3 = z4 + z5;
  950. }
  951. } else {
  952. if (d1) {
  953. /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
  954. z4 = d5 + d1;
  955. z5 = MULTIPLY(z4, FIX_1_175875602);
  956. z1 = MULTIPLY(-d1, FIX_0_899976223);
  957. tmp3 = MULTIPLY(d1, FIX_0_601344887);
  958. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  959. z2 = MULTIPLY(-d5, FIX_2_562915447);
  960. z4 = MULTIPLY(z4, FIX_0_785694958);
  961. tmp0 = z1 + z5;
  962. tmp1 += z4;
  963. tmp2 = z2 + z5;
  964. tmp3 += z4;
  965. } else {
  966. /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
  967. tmp0 = MULTIPLY(d5, FIX_1_175875602);
  968. tmp1 = MULTIPLY(d5, FIX_0_275899380);
  969. tmp2 = MULTIPLY(-d5, FIX_1_387039845);
  970. tmp3 = MULTIPLY(d5, FIX_0_785694958);
  971. }
  972. }
  973. } else {
  974. if (d3) {
  975. if (d1) {
  976. /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
  977. z5 = d1 + d3;
  978. tmp3 = MULTIPLY(d1, FIX_0_211164243);
  979. tmp2 = MULTIPLY(-d3, FIX_1_451774981);
  980. z1 = MULTIPLY(d1, FIX_1_061594337);
  981. z2 = MULTIPLY(-d3, FIX_2_172734803);
  982. z4 = MULTIPLY(z5, FIX_0_785694958);
  983. z5 = MULTIPLY(z5, FIX_1_175875602);
  984. tmp0 = z1 - z4;
  985. tmp1 = z2 + z4;
  986. tmp2 += z5;
  987. tmp3 += z5;
  988. } else {
  989. /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
  990. tmp0 = MULTIPLY(-d3, FIX_0_785694958);
  991. tmp1 = MULTIPLY(-d3, FIX_1_387039845);
  992. tmp2 = MULTIPLY(-d3, FIX_0_275899380);
  993. tmp3 = MULTIPLY(d3, FIX_1_175875602);
  994. }
  995. } else {
  996. if (d1) {
  997. /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
  998. tmp0 = MULTIPLY(d1, FIX_0_275899380);
  999. tmp1 = MULTIPLY(d1, FIX_0_785694958);
  1000. tmp2 = MULTIPLY(d1, FIX_1_175875602);
  1001. tmp3 = MULTIPLY(d1, FIX_1_387039845);
  1002. } else {
  1003. /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
  1004. tmp0 = tmp1 = tmp2 = tmp3 = 0;
  1005. }
  1006. }
  1007. }
  1008. }
  1009. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  1010. dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
  1011. CONST_BITS+PASS1_BITS+3);
  1012. dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
  1013. CONST_BITS+PASS1_BITS+3);
  1014. dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
  1015. CONST_BITS+PASS1_BITS+3);
  1016. dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
  1017. CONST_BITS+PASS1_BITS+3);
  1018. dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
  1019. CONST_BITS+PASS1_BITS+3);
  1020. dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
  1021. CONST_BITS+PASS1_BITS+3);
  1022. dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
  1023. CONST_BITS+PASS1_BITS+3);
  1024. dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
  1025. CONST_BITS+PASS1_BITS+3);
  1026. dataptr++; /* advance pointer to next column */
  1027. }
  1028. }