You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1168 lines
33KB

  1. /*
  2. * jrevdct.c
  3. *
  4. * Copyright (C) 1991, 1992, Thomas G. Lane.
  5. * This file is part of the Independent JPEG Group's software.
  6. * For conditions of distribution and use, see the accompanying README file.
  7. *
  8. * This file contains the basic inverse-DCT transformation subroutine.
  9. *
  10. * This implementation is based on an algorithm described in
  11. * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
  12. * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
  13. * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
  14. * The primary algorithm described there uses 11 multiplies and 29 adds.
  15. * We use their alternate method with 12 multiplies and 32 adds.
  16. * The advantage of this method is that no data path contains more than one
  17. * multiplication; this allows a very simple and accurate implementation in
  18. * scaled fixed-point arithmetic, with a minimal number of shifts.
  19. *
  20. * I've made lots of modifications to attempt to take advantage of the
  21. * sparse nature of the DCT matrices we're getting. Although the logic
  22. * is cumbersome, it's straightforward and the resulting code is much
  23. * faster.
  24. *
  25. * A better way to do this would be to pass in the DCT block as a sparse
  26. * matrix, perhaps with the difference cases encoded.
  27. */
  28. #include "common.h"
  29. #include "dsputil.h"
  30. #define EIGHT_BIT_SAMPLES
  31. #define DCTSIZE 8
  32. #define DCTSIZE2 64
  33. #define GLOBAL
  34. #define RIGHT_SHIFT(x, n) ((x) >> (n))
  35. typedef DCTELEM DCTBLOCK[DCTSIZE2];
  36. #define CONST_BITS 13
  37. /*
  38. * This routine is specialized to the case DCTSIZE = 8.
  39. */
  40. #if DCTSIZE != 8
  41. Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
  42. #endif
  43. /*
  44. * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
  45. * on each column. Direct algorithms are also available, but they are
  46. * much more complex and seem not to be any faster when reduced to code.
  47. *
  48. * The poop on this scaling stuff is as follows:
  49. *
  50. * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
  51. * larger than the true IDCT outputs. The final outputs are therefore
  52. * a factor of N larger than desired; since N=8 this can be cured by
  53. * a simple right shift at the end of the algorithm. The advantage of
  54. * this arrangement is that we save two multiplications per 1-D IDCT,
  55. * because the y0 and y4 inputs need not be divided by sqrt(N).
  56. *
  57. * We have to do addition and subtraction of the integer inputs, which
  58. * is no problem, and multiplication by fractional constants, which is
  59. * a problem to do in integer arithmetic. We multiply all the constants
  60. * by CONST_SCALE and convert them to integer constants (thus retaining
  61. * CONST_BITS bits of precision in the constants). After doing a
  62. * multiplication we have to divide the product by CONST_SCALE, with proper
  63. * rounding, to produce the correct output. This division can be done
  64. * cheaply as a right shift of CONST_BITS bits. We postpone shifting
  65. * as long as possible so that partial sums can be added together with
  66. * full fractional precision.
  67. *
  68. * The outputs of the first pass are scaled up by PASS1_BITS bits so that
  69. * they are represented to better-than-integral precision. These outputs
  70. * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  71. * with the recommended scaling. (To scale up 12-bit sample data further, an
  72. * intermediate int32 array would be needed.)
  73. *
  74. * To avoid overflow of the 32-bit intermediate results in pass 2, we must
  75. * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
  76. * shows that the values given below are the most effective.
  77. */
  78. #ifdef EIGHT_BIT_SAMPLES
  79. #define PASS1_BITS 2
  80. #else
  81. #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
  82. #endif
  83. #define ONE ((INT32) 1)
  84. #define CONST_SCALE (ONE << CONST_BITS)
  85. /* Convert a positive real constant to an integer scaled by CONST_SCALE.
  86. * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
  87. * you will pay a significant penalty in run time. In that case, figure
  88. * the correct integer constant values and insert them by hand.
  89. */
  90. /* Actually FIX is no longer used, we precomputed them all */
  91. #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))
  92. /* Descale and correctly round an INT32 value that's scaled by N bits.
  93. * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  94. * the fudge factor is correct for either sign of X.
  95. */
  96. #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
  97. /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
  98. * For 8-bit samples with the recommended scaling, all the variable
  99. * and constant values involved are no more than 16 bits wide, so a
  100. * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
  101. * this provides a useful speedup on many machines.
  102. * There is no way to specify a 16x16->32 multiply in portable C, but
  103. * some C compilers will do the right thing if you provide the correct
  104. * combination of casts.
  105. * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
  106. */
  107. #ifdef EIGHT_BIT_SAMPLES
  108. #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
  109. #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
  110. #endif
  111. #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
  112. #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))
  113. #endif
  114. #endif
  115. #ifndef MULTIPLY /* default definition */
  116. #define MULTIPLY(var,const) ((var) * (const))
  117. #endif
  118. /*
  119. Unlike our decoder where we approximate the FIXes, we need to use exact
  120. ones here or successive P-frames will drift too much with Reference frame coding
  121. */
  122. #define FIX_0_211164243 1730
  123. #define FIX_0_275899380 2260
  124. #define FIX_0_298631336 2446
  125. #define FIX_0_390180644 3196
  126. #define FIX_0_509795579 4176
  127. #define FIX_0_541196100 4433
  128. #define FIX_0_601344887 4926
  129. #define FIX_0_765366865 6270
  130. #define FIX_0_785694958 6436
  131. #define FIX_0_899976223 7373
  132. #define FIX_1_061594337 8697
  133. #define FIX_1_111140466 9102
  134. #define FIX_1_175875602 9633
  135. #define FIX_1_306562965 10703
  136. #define FIX_1_387039845 11363
  137. #define FIX_1_451774981 11893
  138. #define FIX_1_501321110 12299
  139. #define FIX_1_662939225 13623
  140. #define FIX_1_847759065 15137
  141. #define FIX_1_961570560 16069
  142. #define FIX_2_053119869 16819
  143. #define FIX_2_172734803 17799
  144. #define FIX_2_562915447 20995
  145. #define FIX_3_072711026 25172
  146. /*
  147. * Perform the inverse DCT on one block of coefficients.
  148. */
  149. void j_rev_dct(DCTBLOCK data)
  150. {
  151. INT32 tmp0, tmp1, tmp2, tmp3;
  152. INT32 tmp10, tmp11, tmp12, tmp13;
  153. INT32 z1, z2, z3, z4, z5;
  154. INT32 d0, d1, d2, d3, d4, d5, d6, d7;
  155. register DCTELEM *dataptr;
  156. int rowctr;
  157. /* Pass 1: process rows. */
  158. /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
  159. /* furthermore, we scale the results by 2**PASS1_BITS. */
  160. dataptr = data;
  161. for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
  162. /* Due to quantization, we will usually find that many of the input
  163. * coefficients are zero, especially the AC terms. We can exploit this
  164. * by short-circuiting the IDCT calculation for any row in which all
  165. * the AC terms are zero. In that case each output is equal to the
  166. * DC coefficient (with scale factor as needed).
  167. * With typical images and quantization tables, half or more of the
  168. * row DCT calculations can be simplified this way.
  169. */
  170. register int *idataptr = (int*)dataptr;
  171. d0 = dataptr[0];
  172. d1 = dataptr[1];
  173. d2 = dataptr[2];
  174. d3 = dataptr[3];
  175. d4 = dataptr[4];
  176. d5 = dataptr[5];
  177. d6 = dataptr[6];
  178. d7 = dataptr[7];
  179. if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
  180. /* AC terms all zero */
  181. if (d0) {
  182. /* Compute a 32 bit value to assign. */
  183. DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
  184. register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
  185. idataptr[0] = v;
  186. idataptr[1] = v;
  187. idataptr[2] = v;
  188. idataptr[3] = v;
  189. }
  190. dataptr += DCTSIZE; /* advance pointer to next row */
  191. continue;
  192. }
  193. /* Even part: reverse the even part of the forward DCT. */
  194. /* The rotator is sqrt(2)*c(-6). */
  195. {
  196. if (d6) {
  197. if (d4) {
  198. if (d2) {
  199. if (d0) {
  200. /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
  201. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  202. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  203. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  204. tmp0 = (d0 + d4) << CONST_BITS;
  205. tmp1 = (d0 - d4) << CONST_BITS;
  206. tmp10 = tmp0 + tmp3;
  207. tmp13 = tmp0 - tmp3;
  208. tmp11 = tmp1 + tmp2;
  209. tmp12 = tmp1 - tmp2;
  210. } else {
  211. /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
  212. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  213. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  214. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  215. tmp0 = d4 << CONST_BITS;
  216. tmp10 = tmp0 + tmp3;
  217. tmp13 = tmp0 - tmp3;
  218. tmp11 = tmp2 - tmp0;
  219. tmp12 = -(tmp0 + tmp2);
  220. }
  221. } else {
  222. if (d0) {
  223. /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
  224. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  225. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  226. tmp0 = (d0 + d4) << CONST_BITS;
  227. tmp1 = (d0 - d4) << CONST_BITS;
  228. tmp10 = tmp0 + tmp3;
  229. tmp13 = tmp0 - tmp3;
  230. tmp11 = tmp1 + tmp2;
  231. tmp12 = tmp1 - tmp2;
  232. } else {
  233. /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
  234. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  235. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  236. tmp0 = d4 << CONST_BITS;
  237. tmp10 = tmp0 + tmp3;
  238. tmp13 = tmp0 - tmp3;
  239. tmp11 = tmp2 - tmp0;
  240. tmp12 = -(tmp0 + tmp2);
  241. }
  242. }
  243. } else {
  244. if (d2) {
  245. if (d0) {
  246. /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
  247. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  248. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  249. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  250. tmp0 = d0 << CONST_BITS;
  251. tmp10 = tmp0 + tmp3;
  252. tmp13 = tmp0 - tmp3;
  253. tmp11 = tmp0 + tmp2;
  254. tmp12 = tmp0 - tmp2;
  255. } else {
  256. /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
  257. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  258. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  259. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  260. tmp10 = tmp3;
  261. tmp13 = -tmp3;
  262. tmp11 = tmp2;
  263. tmp12 = -tmp2;
  264. }
  265. } else {
  266. if (d0) {
  267. /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
  268. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  269. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  270. tmp0 = d0 << CONST_BITS;
  271. tmp10 = tmp0 + tmp3;
  272. tmp13 = tmp0 - tmp3;
  273. tmp11 = tmp0 + tmp2;
  274. tmp12 = tmp0 - tmp2;
  275. } else {
  276. /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
  277. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  278. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  279. tmp10 = tmp3;
  280. tmp13 = -tmp3;
  281. tmp11 = tmp2;
  282. tmp12 = -tmp2;
  283. }
  284. }
  285. }
  286. } else {
  287. if (d4) {
  288. if (d2) {
  289. if (d0) {
  290. /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
  291. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  292. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  293. tmp0 = (d0 + d4) << CONST_BITS;
  294. tmp1 = (d0 - d4) << CONST_BITS;
  295. tmp10 = tmp0 + tmp3;
  296. tmp13 = tmp0 - tmp3;
  297. tmp11 = tmp1 + tmp2;
  298. tmp12 = tmp1 - tmp2;
  299. } else {
  300. /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
  301. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  302. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  303. tmp0 = d4 << CONST_BITS;
  304. tmp10 = tmp0 + tmp3;
  305. tmp13 = tmp0 - tmp3;
  306. tmp11 = tmp2 - tmp0;
  307. tmp12 = -(tmp0 + tmp2);
  308. }
  309. } else {
  310. if (d0) {
  311. /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
  312. tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
  313. tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
  314. } else {
  315. /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
  316. tmp10 = tmp13 = d4 << CONST_BITS;
  317. tmp11 = tmp12 = -tmp10;
  318. }
  319. }
  320. } else {
  321. if (d2) {
  322. if (d0) {
  323. /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
  324. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  325. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  326. tmp0 = d0 << CONST_BITS;
  327. tmp10 = tmp0 + tmp3;
  328. tmp13 = tmp0 - tmp3;
  329. tmp11 = tmp0 + tmp2;
  330. tmp12 = tmp0 - tmp2;
  331. } else {
  332. /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
  333. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  334. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  335. tmp10 = tmp3;
  336. tmp13 = -tmp3;
  337. tmp11 = tmp2;
  338. tmp12 = -tmp2;
  339. }
  340. } else {
  341. if (d0) {
  342. /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
  343. tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
  344. } else {
  345. /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
  346. tmp10 = tmp13 = tmp11 = tmp12 = 0;
  347. }
  348. }
  349. }
  350. }
  351. /* Odd part per figure 8; the matrix is unitary and hence its
  352. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  353. */
  354. if (d7) {
  355. if (d5) {
  356. if (d3) {
  357. if (d1) {
  358. /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
  359. z1 = d7 + d1;
  360. z2 = d5 + d3;
  361. z3 = d7 + d3;
  362. z4 = d5 + d1;
  363. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  364. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  365. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  366. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  367. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  368. z1 = MULTIPLY(-z1, FIX_0_899976223);
  369. z2 = MULTIPLY(-z2, FIX_2_562915447);
  370. z3 = MULTIPLY(-z3, FIX_1_961570560);
  371. z4 = MULTIPLY(-z4, FIX_0_390180644);
  372. z3 += z5;
  373. z4 += z5;
  374. tmp0 += z1 + z3;
  375. tmp1 += z2 + z4;
  376. tmp2 += z2 + z3;
  377. tmp3 += z1 + z4;
  378. } else {
  379. /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
  380. z2 = d5 + d3;
  381. z3 = d7 + d3;
  382. z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
  383. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  384. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  385. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  386. z1 = MULTIPLY(-d7, FIX_0_899976223);
  387. z2 = MULTIPLY(-z2, FIX_2_562915447);
  388. z3 = MULTIPLY(-z3, FIX_1_961570560);
  389. z4 = MULTIPLY(-d5, FIX_0_390180644);
  390. z3 += z5;
  391. z4 += z5;
  392. tmp0 += z1 + z3;
  393. tmp1 += z2 + z4;
  394. tmp2 += z2 + z3;
  395. tmp3 = z1 + z4;
  396. }
  397. } else {
  398. if (d1) {
  399. /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
  400. z1 = d7 + d1;
  401. z4 = d5 + d1;
  402. z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
  403. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  404. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  405. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  406. z1 = MULTIPLY(-z1, FIX_0_899976223);
  407. z2 = MULTIPLY(-d5, FIX_2_562915447);
  408. z3 = MULTIPLY(-d7, FIX_1_961570560);
  409. z4 = MULTIPLY(-z4, FIX_0_390180644);
  410. z3 += z5;
  411. z4 += z5;
  412. tmp0 += z1 + z3;
  413. tmp1 += z2 + z4;
  414. tmp2 = z2 + z3;
  415. tmp3 += z1 + z4;
  416. } else {
  417. /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
  418. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  419. z1 = MULTIPLY(-d7, FIX_0_899976223);
  420. z3 = MULTIPLY(-d7, FIX_1_961570560);
  421. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  422. z2 = MULTIPLY(-d5, FIX_2_562915447);
  423. z4 = MULTIPLY(-d5, FIX_0_390180644);
  424. z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
  425. z3 += z5;
  426. z4 += z5;
  427. tmp0 += z3;
  428. tmp1 += z4;
  429. tmp2 = z2 + z3;
  430. tmp3 = z1 + z4;
  431. }
  432. }
  433. } else {
  434. if (d3) {
  435. if (d1) {
  436. /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
  437. z1 = d7 + d1;
  438. z3 = d7 + d3;
  439. z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
  440. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  441. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  442. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  443. z1 = MULTIPLY(-z1, FIX_0_899976223);
  444. z2 = MULTIPLY(-d3, FIX_2_562915447);
  445. z3 = MULTIPLY(-z3, FIX_1_961570560);
  446. z4 = MULTIPLY(-d1, FIX_0_390180644);
  447. z3 += z5;
  448. z4 += z5;
  449. tmp0 += z1 + z3;
  450. tmp1 = z2 + z4;
  451. tmp2 += z2 + z3;
  452. tmp3 += z1 + z4;
  453. } else {
  454. /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
  455. z3 = d7 + d3;
  456. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  457. z1 = MULTIPLY(-d7, FIX_0_899976223);
  458. tmp2 = MULTIPLY(d3, FIX_0_509795579);
  459. z2 = MULTIPLY(-d3, FIX_2_562915447);
  460. z5 = MULTIPLY(z3, FIX_1_175875602);
  461. z3 = MULTIPLY(-z3, FIX_0_785694958);
  462. tmp0 += z3;
  463. tmp1 = z2 + z5;
  464. tmp2 += z3;
  465. tmp3 = z1 + z5;
  466. }
  467. } else {
  468. if (d1) {
  469. /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
  470. z1 = d7 + d1;
  471. z5 = MULTIPLY(z1, FIX_1_175875602);
  472. z1 = MULTIPLY(z1, FIX_0_275899380);
  473. z3 = MULTIPLY(-d7, FIX_1_961570560);
  474. tmp0 = MULTIPLY(-d7, FIX_1_662939225);
  475. z4 = MULTIPLY(-d1, FIX_0_390180644);
  476. tmp3 = MULTIPLY(d1, FIX_1_111140466);
  477. tmp0 += z1;
  478. tmp1 = z4 + z5;
  479. tmp2 = z3 + z5;
  480. tmp3 += z1;
  481. } else {
  482. /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
  483. tmp0 = MULTIPLY(-d7, FIX_1_387039845);
  484. tmp1 = MULTIPLY(d7, FIX_1_175875602);
  485. tmp2 = MULTIPLY(-d7, FIX_0_785694958);
  486. tmp3 = MULTIPLY(d7, FIX_0_275899380);
  487. }
  488. }
  489. }
  490. } else {
  491. if (d5) {
  492. if (d3) {
  493. if (d1) {
  494. /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
  495. z2 = d5 + d3;
  496. z4 = d5 + d1;
  497. z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
  498. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  499. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  500. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  501. z1 = MULTIPLY(-d1, FIX_0_899976223);
  502. z2 = MULTIPLY(-z2, FIX_2_562915447);
  503. z3 = MULTIPLY(-d3, FIX_1_961570560);
  504. z4 = MULTIPLY(-z4, FIX_0_390180644);
  505. z3 += z5;
  506. z4 += z5;
  507. tmp0 = z1 + z3;
  508. tmp1 += z2 + z4;
  509. tmp2 += z2 + z3;
  510. tmp3 += z1 + z4;
  511. } else {
  512. /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
  513. z2 = d5 + d3;
  514. z5 = MULTIPLY(z2, FIX_1_175875602);
  515. tmp1 = MULTIPLY(d5, FIX_1_662939225);
  516. z4 = MULTIPLY(-d5, FIX_0_390180644);
  517. z2 = MULTIPLY(-z2, FIX_1_387039845);
  518. tmp2 = MULTIPLY(d3, FIX_1_111140466);
  519. z3 = MULTIPLY(-d3, FIX_1_961570560);
  520. tmp0 = z3 + z5;
  521. tmp1 += z2;
  522. tmp2 += z2;
  523. tmp3 = z4 + z5;
  524. }
  525. } else {
  526. if (d1) {
  527. /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
  528. z4 = d5 + d1;
  529. z5 = MULTIPLY(z4, FIX_1_175875602);
  530. z1 = MULTIPLY(-d1, FIX_0_899976223);
  531. tmp3 = MULTIPLY(d1, FIX_0_601344887);
  532. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  533. z2 = MULTIPLY(-d5, FIX_2_562915447);
  534. z4 = MULTIPLY(z4, FIX_0_785694958);
  535. tmp0 = z1 + z5;
  536. tmp1 += z4;
  537. tmp2 = z2 + z5;
  538. tmp3 += z4;
  539. } else {
  540. /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
  541. tmp0 = MULTIPLY(d5, FIX_1_175875602);
  542. tmp1 = MULTIPLY(d5, FIX_0_275899380);
  543. tmp2 = MULTIPLY(-d5, FIX_1_387039845);
  544. tmp3 = MULTIPLY(d5, FIX_0_785694958);
  545. }
  546. }
  547. } else {
  548. if (d3) {
  549. if (d1) {
  550. /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
  551. z5 = d1 + d3;
  552. tmp3 = MULTIPLY(d1, FIX_0_211164243);
  553. tmp2 = MULTIPLY(-d3, FIX_1_451774981);
  554. z1 = MULTIPLY(d1, FIX_1_061594337);
  555. z2 = MULTIPLY(-d3, FIX_2_172734803);
  556. z4 = MULTIPLY(z5, FIX_0_785694958);
  557. z5 = MULTIPLY(z5, FIX_1_175875602);
  558. tmp0 = z1 - z4;
  559. tmp1 = z2 + z4;
  560. tmp2 += z5;
  561. tmp3 += z5;
  562. } else {
  563. /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
  564. tmp0 = MULTIPLY(-d3, FIX_0_785694958);
  565. tmp1 = MULTIPLY(-d3, FIX_1_387039845);
  566. tmp2 = MULTIPLY(-d3, FIX_0_275899380);
  567. tmp3 = MULTIPLY(d3, FIX_1_175875602);
  568. }
  569. } else {
  570. if (d1) {
  571. /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
  572. tmp0 = MULTIPLY(d1, FIX_0_275899380);
  573. tmp1 = MULTIPLY(d1, FIX_0_785694958);
  574. tmp2 = MULTIPLY(d1, FIX_1_175875602);
  575. tmp3 = MULTIPLY(d1, FIX_1_387039845);
  576. } else {
  577. /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
  578. tmp0 = tmp1 = tmp2 = tmp3 = 0;
  579. }
  580. }
  581. }
  582. }
  583. }
  584. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  585. dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
  586. dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
  587. dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
  588. dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
  589. dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
  590. dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
  591. dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
  592. dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
  593. dataptr += DCTSIZE; /* advance pointer to next row */
  594. }
  595. /* Pass 2: process columns. */
  596. /* Note that we must descale the results by a factor of 8 == 2**3, */
  597. /* and also undo the PASS1_BITS scaling. */
  598. dataptr = data;
  599. for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
  600. /* Columns of zeroes can be exploited in the same way as we did with rows.
  601. * However, the row calculation has created many nonzero AC terms, so the
  602. * simplification applies less often (typically 5% to 10% of the time).
  603. * On machines with very fast multiplication, it's possible that the
  604. * test takes more time than it's worth. In that case this section
  605. * may be commented out.
  606. */
  607. d0 = dataptr[DCTSIZE*0];
  608. d1 = dataptr[DCTSIZE*1];
  609. d2 = dataptr[DCTSIZE*2];
  610. d3 = dataptr[DCTSIZE*3];
  611. d4 = dataptr[DCTSIZE*4];
  612. d5 = dataptr[DCTSIZE*5];
  613. d6 = dataptr[DCTSIZE*6];
  614. d7 = dataptr[DCTSIZE*7];
  615. /* Even part: reverse the even part of the forward DCT. */
  616. /* The rotator is sqrt(2)*c(-6). */
  617. if (d6) {
  618. if (d4) {
  619. if (d2) {
  620. if (d0) {
  621. /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
  622. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  623. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  624. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  625. tmp0 = (d0 + d4) << CONST_BITS;
  626. tmp1 = (d0 - d4) << CONST_BITS;
  627. tmp10 = tmp0 + tmp3;
  628. tmp13 = tmp0 - tmp3;
  629. tmp11 = tmp1 + tmp2;
  630. tmp12 = tmp1 - tmp2;
  631. } else {
  632. /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
  633. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  634. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  635. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  636. tmp0 = d4 << CONST_BITS;
  637. tmp10 = tmp0 + tmp3;
  638. tmp13 = tmp0 - tmp3;
  639. tmp11 = tmp2 - tmp0;
  640. tmp12 = -(tmp0 + tmp2);
  641. }
  642. } else {
  643. if (d0) {
  644. /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
  645. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  646. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  647. tmp0 = (d0 + d4) << CONST_BITS;
  648. tmp1 = (d0 - d4) << CONST_BITS;
  649. tmp10 = tmp0 + tmp3;
  650. tmp13 = tmp0 - tmp3;
  651. tmp11 = tmp1 + tmp2;
  652. tmp12 = tmp1 - tmp2;
  653. } else {
  654. /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
  655. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  656. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  657. tmp0 = d4 << CONST_BITS;
  658. tmp10 = tmp0 + tmp3;
  659. tmp13 = tmp0 - tmp3;
  660. tmp11 = tmp2 - tmp0;
  661. tmp12 = -(tmp0 + tmp2);
  662. }
  663. }
  664. } else {
  665. if (d2) {
  666. if (d0) {
  667. /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
  668. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  669. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  670. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  671. tmp0 = d0 << CONST_BITS;
  672. tmp10 = tmp0 + tmp3;
  673. tmp13 = tmp0 - tmp3;
  674. tmp11 = tmp0 + tmp2;
  675. tmp12 = tmp0 - tmp2;
  676. } else {
  677. /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
  678. z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
  679. tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
  680. tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
  681. tmp10 = tmp3;
  682. tmp13 = -tmp3;
  683. tmp11 = tmp2;
  684. tmp12 = -tmp2;
  685. }
  686. } else {
  687. if (d0) {
  688. /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
  689. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  690. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  691. tmp0 = d0 << CONST_BITS;
  692. tmp10 = tmp0 + tmp3;
  693. tmp13 = tmp0 - tmp3;
  694. tmp11 = tmp0 + tmp2;
  695. tmp12 = tmp0 - tmp2;
  696. } else {
  697. /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
  698. tmp2 = MULTIPLY(-d6, FIX_1_306562965);
  699. tmp3 = MULTIPLY(d6, FIX_0_541196100);
  700. tmp10 = tmp3;
  701. tmp13 = -tmp3;
  702. tmp11 = tmp2;
  703. tmp12 = -tmp2;
  704. }
  705. }
  706. }
  707. } else {
  708. if (d4) {
  709. if (d2) {
  710. if (d0) {
  711. /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
  712. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  713. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  714. tmp0 = (d0 + d4) << CONST_BITS;
  715. tmp1 = (d0 - d4) << CONST_BITS;
  716. tmp10 = tmp0 + tmp3;
  717. tmp13 = tmp0 - tmp3;
  718. tmp11 = tmp1 + tmp2;
  719. tmp12 = tmp1 - tmp2;
  720. } else {
  721. /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
  722. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  723. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  724. tmp0 = d4 << CONST_BITS;
  725. tmp10 = tmp0 + tmp3;
  726. tmp13 = tmp0 - tmp3;
  727. tmp11 = tmp2 - tmp0;
  728. tmp12 = -(tmp0 + tmp2);
  729. }
  730. } else {
  731. if (d0) {
  732. /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
  733. tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
  734. tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
  735. } else {
  736. /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
  737. tmp10 = tmp13 = d4 << CONST_BITS;
  738. tmp11 = tmp12 = -tmp10;
  739. }
  740. }
  741. } else {
  742. if (d2) {
  743. if (d0) {
  744. /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
  745. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  746. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  747. tmp0 = d0 << CONST_BITS;
  748. tmp10 = tmp0 + tmp3;
  749. tmp13 = tmp0 - tmp3;
  750. tmp11 = tmp0 + tmp2;
  751. tmp12 = tmp0 - tmp2;
  752. } else {
  753. /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
  754. tmp2 = MULTIPLY(d2, FIX_0_541196100);
  755. tmp3 = MULTIPLY(d2, FIX_1_306562965);
  756. tmp10 = tmp3;
  757. tmp13 = -tmp3;
  758. tmp11 = tmp2;
  759. tmp12 = -tmp2;
  760. }
  761. } else {
  762. if (d0) {
  763. /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
  764. tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
  765. } else {
  766. /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
  767. tmp10 = tmp13 = tmp11 = tmp12 = 0;
  768. }
  769. }
  770. }
  771. }
  772. /* Odd part per figure 8; the matrix is unitary and hence its
  773. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  774. */
  775. if (d7) {
  776. if (d5) {
  777. if (d3) {
  778. if (d1) {
  779. /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
  780. z1 = d7 + d1;
  781. z2 = d5 + d3;
  782. z3 = d7 + d3;
  783. z4 = d5 + d1;
  784. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  785. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  786. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  787. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  788. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  789. z1 = MULTIPLY(-z1, FIX_0_899976223);
  790. z2 = MULTIPLY(-z2, FIX_2_562915447);
  791. z3 = MULTIPLY(-z3, FIX_1_961570560);
  792. z4 = MULTIPLY(-z4, FIX_0_390180644);
  793. z3 += z5;
  794. z4 += z5;
  795. tmp0 += z1 + z3;
  796. tmp1 += z2 + z4;
  797. tmp2 += z2 + z3;
  798. tmp3 += z1 + z4;
  799. } else {
  800. /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
  801. z1 = d7;
  802. z2 = d5 + d3;
  803. z3 = d7 + d3;
  804. z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
  805. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  806. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  807. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  808. z1 = MULTIPLY(-d7, FIX_0_899976223);
  809. z2 = MULTIPLY(-z2, FIX_2_562915447);
  810. z3 = MULTIPLY(-z3, FIX_1_961570560);
  811. z4 = MULTIPLY(-d5, FIX_0_390180644);
  812. z3 += z5;
  813. z4 += z5;
  814. tmp0 += z1 + z3;
  815. tmp1 += z2 + z4;
  816. tmp2 += z2 + z3;
  817. tmp3 = z1 + z4;
  818. }
  819. } else {
  820. if (d1) {
  821. /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
  822. z1 = d7 + d1;
  823. z2 = d5;
  824. z3 = d7;
  825. z4 = d5 + d1;
  826. z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
  827. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  828. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  829. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  830. z1 = MULTIPLY(-z1, FIX_0_899976223);
  831. z2 = MULTIPLY(-d5, FIX_2_562915447);
  832. z3 = MULTIPLY(-d7, FIX_1_961570560);
  833. z4 = MULTIPLY(-z4, FIX_0_390180644);
  834. z3 += z5;
  835. z4 += z5;
  836. tmp0 += z1 + z3;
  837. tmp1 += z2 + z4;
  838. tmp2 = z2 + z3;
  839. tmp3 += z1 + z4;
  840. } else {
  841. /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
  842. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  843. z1 = MULTIPLY(-d7, FIX_0_899976223);
  844. z3 = MULTIPLY(-d7, FIX_1_961570560);
  845. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  846. z2 = MULTIPLY(-d5, FIX_2_562915447);
  847. z4 = MULTIPLY(-d5, FIX_0_390180644);
  848. z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
  849. z3 += z5;
  850. z4 += z5;
  851. tmp0 += z3;
  852. tmp1 += z4;
  853. tmp2 = z2 + z3;
  854. tmp3 = z1 + z4;
  855. }
  856. }
  857. } else {
  858. if (d3) {
  859. if (d1) {
  860. /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
  861. z1 = d7 + d1;
  862. z3 = d7 + d3;
  863. z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
  864. tmp0 = MULTIPLY(d7, FIX_0_298631336);
  865. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  866. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  867. z1 = MULTIPLY(-z1, FIX_0_899976223);
  868. z2 = MULTIPLY(-d3, FIX_2_562915447);
  869. z3 = MULTIPLY(-z3, FIX_1_961570560);
  870. z4 = MULTIPLY(-d1, FIX_0_390180644);
  871. z3 += z5;
  872. z4 += z5;
  873. tmp0 += z1 + z3;
  874. tmp1 = z2 + z4;
  875. tmp2 += z2 + z3;
  876. tmp3 += z1 + z4;
  877. } else {
  878. /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
  879. z3 = d7 + d3;
  880. tmp0 = MULTIPLY(-d7, FIX_0_601344887);
  881. z1 = MULTIPLY(-d7, FIX_0_899976223);
  882. tmp2 = MULTIPLY(d3, FIX_0_509795579);
  883. z2 = MULTIPLY(-d3, FIX_2_562915447);
  884. z5 = MULTIPLY(z3, FIX_1_175875602);
  885. z3 = MULTIPLY(-z3, FIX_0_785694958);
  886. tmp0 += z3;
  887. tmp1 = z2 + z5;
  888. tmp2 += z3;
  889. tmp3 = z1 + z5;
  890. }
  891. } else {
  892. if (d1) {
  893. /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
  894. z1 = d7 + d1;
  895. z5 = MULTIPLY(z1, FIX_1_175875602);
  896. z1 = MULTIPLY(z1, FIX_0_275899380);
  897. z3 = MULTIPLY(-d7, FIX_1_961570560);
  898. tmp0 = MULTIPLY(-d7, FIX_1_662939225);
  899. z4 = MULTIPLY(-d1, FIX_0_390180644);
  900. tmp3 = MULTIPLY(d1, FIX_1_111140466);
  901. tmp0 += z1;
  902. tmp1 = z4 + z5;
  903. tmp2 = z3 + z5;
  904. tmp3 += z1;
  905. } else {
  906. /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
  907. tmp0 = MULTIPLY(-d7, FIX_1_387039845);
  908. tmp1 = MULTIPLY(d7, FIX_1_175875602);
  909. tmp2 = MULTIPLY(-d7, FIX_0_785694958);
  910. tmp3 = MULTIPLY(d7, FIX_0_275899380);
  911. }
  912. }
  913. }
  914. } else {
  915. if (d5) {
  916. if (d3) {
  917. if (d1) {
  918. /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
  919. z2 = d5 + d3;
  920. z4 = d5 + d1;
  921. z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
  922. tmp1 = MULTIPLY(d5, FIX_2_053119869);
  923. tmp2 = MULTIPLY(d3, FIX_3_072711026);
  924. tmp3 = MULTIPLY(d1, FIX_1_501321110);
  925. z1 = MULTIPLY(-d1, FIX_0_899976223);
  926. z2 = MULTIPLY(-z2, FIX_2_562915447);
  927. z3 = MULTIPLY(-d3, FIX_1_961570560);
  928. z4 = MULTIPLY(-z4, FIX_0_390180644);
  929. z3 += z5;
  930. z4 += z5;
  931. tmp0 = z1 + z3;
  932. tmp1 += z2 + z4;
  933. tmp2 += z2 + z3;
  934. tmp3 += z1 + z4;
  935. } else {
  936. /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
  937. z2 = d5 + d3;
  938. z5 = MULTIPLY(z2, FIX_1_175875602);
  939. tmp1 = MULTIPLY(d5, FIX_1_662939225);
  940. z4 = MULTIPLY(-d5, FIX_0_390180644);
  941. z2 = MULTIPLY(-z2, FIX_1_387039845);
  942. tmp2 = MULTIPLY(d3, FIX_1_111140466);
  943. z3 = MULTIPLY(-d3, FIX_1_961570560);
  944. tmp0 = z3 + z5;
  945. tmp1 += z2;
  946. tmp2 += z2;
  947. tmp3 = z4 + z5;
  948. }
  949. } else {
  950. if (d1) {
  951. /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
  952. z4 = d5 + d1;
  953. z5 = MULTIPLY(z4, FIX_1_175875602);
  954. z1 = MULTIPLY(-d1, FIX_0_899976223);
  955. tmp3 = MULTIPLY(d1, FIX_0_601344887);
  956. tmp1 = MULTIPLY(-d5, FIX_0_509795579);
  957. z2 = MULTIPLY(-d5, FIX_2_562915447);
  958. z4 = MULTIPLY(z4, FIX_0_785694958);
  959. tmp0 = z1 + z5;
  960. tmp1 += z4;
  961. tmp2 = z2 + z5;
  962. tmp3 += z4;
  963. } else {
  964. /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
  965. tmp0 = MULTIPLY(d5, FIX_1_175875602);
  966. tmp1 = MULTIPLY(d5, FIX_0_275899380);
  967. tmp2 = MULTIPLY(-d5, FIX_1_387039845);
  968. tmp3 = MULTIPLY(d5, FIX_0_785694958);
  969. }
  970. }
  971. } else {
  972. if (d3) {
  973. if (d1) {
  974. /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
  975. z5 = d1 + d3;
  976. tmp3 = MULTIPLY(d1, FIX_0_211164243);
  977. tmp2 = MULTIPLY(-d3, FIX_1_451774981);
  978. z1 = MULTIPLY(d1, FIX_1_061594337);
  979. z2 = MULTIPLY(-d3, FIX_2_172734803);
  980. z4 = MULTIPLY(z5, FIX_0_785694958);
  981. z5 = MULTIPLY(z5, FIX_1_175875602);
  982. tmp0 = z1 - z4;
  983. tmp1 = z2 + z4;
  984. tmp2 += z5;
  985. tmp3 += z5;
  986. } else {
  987. /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
  988. tmp0 = MULTIPLY(-d3, FIX_0_785694958);
  989. tmp1 = MULTIPLY(-d3, FIX_1_387039845);
  990. tmp2 = MULTIPLY(-d3, FIX_0_275899380);
  991. tmp3 = MULTIPLY(d3, FIX_1_175875602);
  992. }
  993. } else {
  994. if (d1) {
  995. /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
  996. tmp0 = MULTIPLY(d1, FIX_0_275899380);
  997. tmp1 = MULTIPLY(d1, FIX_0_785694958);
  998. tmp2 = MULTIPLY(d1, FIX_1_175875602);
  999. tmp3 = MULTIPLY(d1, FIX_1_387039845);
  1000. } else {
  1001. /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
  1002. tmp0 = tmp1 = tmp2 = tmp3 = 0;
  1003. }
  1004. }
  1005. }
  1006. }
  1007. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  1008. dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
  1009. CONST_BITS+PASS1_BITS+3);
  1010. dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
  1011. CONST_BITS+PASS1_BITS+3);
  1012. dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
  1013. CONST_BITS+PASS1_BITS+3);
  1014. dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
  1015. CONST_BITS+PASS1_BITS+3);
  1016. dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
  1017. CONST_BITS+PASS1_BITS+3);
  1018. dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
  1019. CONST_BITS+PASS1_BITS+3);
  1020. dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
  1021. CONST_BITS+PASS1_BITS+3);
  1022. dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
  1023. CONST_BITS+PASS1_BITS+3);
  1024. dataptr++; /* advance pointer to next column */
  1025. }
  1026. }