You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1112 lines
34KB

  1. /*
  2. * Misc image convertion routines
  3. * Copyright (c) 2001, 2002 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include "avcodec.h"
  20. #include "dsputil.h"
  21. #ifdef USE_FASTMEMCPY
  22. #include "fastmemcpy.h"
  23. #endif
  24. #ifdef HAVE_MMX
  25. #include "i386/mmx.h"
  26. #endif
  27. /* XXX: totally non optimized */
  28. static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  29. UINT8 *src, int width, int height)
  30. {
  31. int x, y;
  32. UINT8 *p = src;
  33. for(y=0;y<height;y+=2) {
  34. for(x=0;x<width;x+=2) {
  35. lum[0] = p[0];
  36. cb[0] = p[1];
  37. lum[1] = p[2];
  38. cr[0] = p[3];
  39. p += 4;
  40. lum += 2;
  41. cb++;
  42. cr++;
  43. }
  44. for(x=0;x<width;x+=2) {
  45. lum[0] = p[0];
  46. lum[1] = p[2];
  47. p += 4;
  48. lum += 2;
  49. }
  50. }
  51. }
  52. #define SCALEBITS 8
  53. #define ONE_HALF (1 << (SCALEBITS - 1))
  54. #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5))
  55. static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  56. UINT8 *src, int width, int height)
  57. {
  58. int wrap, wrap3, x, y;
  59. int r, g, b, r1, g1, b1;
  60. UINT8 *p;
  61. wrap = width;
  62. wrap3 = width * 3;
  63. p = src;
  64. for(y=0;y<height;y+=2) {
  65. for(x=0;x<width;x+=2) {
  66. r = p[0];
  67. g = p[1];
  68. b = p[2];
  69. r1 = r;
  70. g1 = g;
  71. b1 = b;
  72. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  73. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  74. r = p[3];
  75. g = p[4];
  76. b = p[5];
  77. r1 += r;
  78. g1 += g;
  79. b1 += b;
  80. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  81. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  82. p += wrap3;
  83. lum += wrap;
  84. r = p[0];
  85. g = p[1];
  86. b = p[2];
  87. r1 += r;
  88. g1 += g;
  89. b1 += b;
  90. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  91. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  92. r = p[3];
  93. g = p[4];
  94. b = p[5];
  95. r1 += r;
  96. g1 += g;
  97. b1 += b;
  98. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  99. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  100. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  101. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  102. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  103. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  104. cb++;
  105. cr++;
  106. p += -wrap3 + 2 * 3;
  107. lum += -wrap + 2;
  108. }
  109. p += wrap3;
  110. lum += wrap;
  111. }
  112. }
  113. static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  114. UINT8 *src, int width, int height)
  115. {
  116. int wrap, wrap4, x, y;
  117. int r, g, b, r1, g1, b1;
  118. UINT8 *p;
  119. wrap = width;
  120. wrap4 = width * 4;
  121. p = src;
  122. for(y=0;y<height;y+=2) {
  123. for(x=0;x<width;x+=2) {
  124. r = p[0];
  125. g = p[1];
  126. b = p[2];
  127. r1 = r;
  128. g1 = g;
  129. b1 = b;
  130. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  131. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  132. r = p[4];
  133. g = p[5];
  134. b = p[6];
  135. r1 += r;
  136. g1 += g;
  137. b1 += b;
  138. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  139. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  140. p += wrap4;
  141. lum += wrap;
  142. r = p[0];
  143. g = p[1];
  144. b = p[2];
  145. r1 += r;
  146. g1 += g;
  147. b1 += b;
  148. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  149. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  150. r = p[4];
  151. g = p[5];
  152. b = p[6];
  153. r1 += r;
  154. g1 += g;
  155. b1 += b;
  156. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  157. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  158. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  159. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  160. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  161. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  162. cb++;
  163. cr++;
  164. p += -wrap4 + 2 * 4;
  165. lum += -wrap + 2;
  166. }
  167. p += wrap4;
  168. lum += wrap;
  169. }
  170. }
  171. #define rgb565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0020,63,0x0001,31)
  172. #define rgb555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0400,31, 0x0020,31,0x0001,31)
  173. #define rgb5551_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0800,31, 0x0040,31,0x0002,31)
  174. #define bgr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,63,0x0800,31)
  175. #define bgr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0020,31,0x0400,31)
  176. #define gbr565_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0800,31,0x0040,63)
  177. #define gbr555_to_yuv420p(lum,cb,cr,src,width,height) rgbmisc_to_yuv420p((lum),(cb),(cr),(src),(width),(height),0x0001,31, 0x0400,31,0x0020,31)
  178. static void rgbmisc_to_yuv420p
  179. (UINT8 *lum, UINT8 *cb, UINT8 *cr,
  180. UINT8 *src, int width, int height,
  181. UINT16 R_LOWMASK, UINT16 R_MAX,
  182. UINT16 G_LOWMASK, UINT16 G_MAX,
  183. UINT16 B_LOWMASK, UINT16 B_MAX
  184. )
  185. {
  186. int wrap, wrap2, x, y;
  187. int r, g, b, r1, g1, b1;
  188. UINT8 *p;
  189. UINT16 pixel;
  190. wrap = width;
  191. wrap2 = width * 2;
  192. p = src;
  193. for(y=0;y<height;y+=2) {
  194. for(x=0;x<width;x+=2) {
  195. pixel = p[0] | (p[1]<<8);
  196. r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
  197. g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
  198. b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
  199. r1 = r;
  200. g1 = g;
  201. b1 = b;
  202. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  203. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  204. pixel = p[2] | (p[3]<<8);
  205. r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
  206. g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
  207. b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
  208. r1 += r;
  209. g1 += g;
  210. b1 += b;
  211. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  212. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  213. p += wrap2;
  214. lum += wrap;
  215. pixel = p[0] | (p[1]<<8);
  216. r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
  217. g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
  218. b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
  219. r1 += r;
  220. g1 += g;
  221. b1 += b;
  222. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  223. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  224. pixel = p[2] | (p[3]<<8);
  225. r = (((pixel/R_LOWMASK) & R_MAX) * (0x100 / (R_MAX+1)));
  226. g = (((pixel/G_LOWMASK) & G_MAX) * (0x100 / (G_MAX+1)));
  227. b = (((pixel/B_LOWMASK) & B_MAX) * (0x100 / (B_MAX+1)));
  228. r1 += r;
  229. g1 += g;
  230. b1 += b;
  231. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  232. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  233. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  234. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  235. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  236. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  237. cb++;
  238. cr++;
  239. p += -wrap2 + 2 * 2;
  240. lum += -wrap + 2;
  241. }
  242. p += wrap2;
  243. lum += wrap;
  244. }
  245. }
  246. static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  247. UINT8 *src, int width, int height)
  248. {
  249. int wrap, wrap3, x, y;
  250. int r, g, b, r1, g1, b1;
  251. UINT8 *p;
  252. wrap = width;
  253. wrap3 = width * 3;
  254. p = src;
  255. for(y=0;y<height;y+=2) {
  256. for(x=0;x<width;x+=2) {
  257. b = p[0];
  258. g = p[1];
  259. r = p[2];
  260. r1 = r;
  261. g1 = g;
  262. b1 = b;
  263. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  264. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  265. b = p[3];
  266. g = p[4];
  267. r = p[5];
  268. r1 += r;
  269. g1 += g;
  270. b1 += b;
  271. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  272. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  273. p += wrap3;
  274. lum += wrap;
  275. b = p[0];
  276. g = p[1];
  277. r = p[2];
  278. r1 += r;
  279. g1 += g;
  280. b1 += b;
  281. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  282. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  283. b = p[3];
  284. g = p[4];
  285. r = p[5];
  286. r1 += r;
  287. g1 += g;
  288. b1 += b;
  289. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  290. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  291. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  292. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  293. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  294. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  295. cb++;
  296. cr++;
  297. p += -wrap3 + 2 * 3;
  298. lum += -wrap + 2;
  299. }
  300. p += wrap3;
  301. lum += wrap;
  302. }
  303. }
  304. static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  305. UINT8 *src, int width, int height)
  306. {
  307. int wrap, wrap4, x, y;
  308. int r, g, b, r1, g1, b1;
  309. UINT8 *p;
  310. wrap = width;
  311. wrap4 = width * 4;
  312. p = src;
  313. for(y=0;y<height;y+=2) {
  314. for(x=0;x<width;x+=2) {
  315. b = p[0];
  316. g = p[1];
  317. r = p[2];
  318. r1 = r;
  319. g1 = g;
  320. b1 = b;
  321. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  322. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  323. b = p[4];
  324. g = p[5];
  325. r = p[6];
  326. r1 += r;
  327. g1 += g;
  328. b1 += b;
  329. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  330. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  331. p += wrap4;
  332. lum += wrap;
  333. b = p[0];
  334. g = p[1];
  335. r = p[2];
  336. r1 += r;
  337. g1 += g;
  338. b1 += b;
  339. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  340. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  341. b = p[4];
  342. g = p[5];
  343. r = p[6];
  344. r1 += r;
  345. g1 += g;
  346. b1 += b;
  347. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  348. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  349. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  350. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  351. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  352. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  353. cb++;
  354. cr++;
  355. p += -wrap4 + 2 * 4;
  356. lum += -wrap + 2;
  357. }
  358. p += wrap4;
  359. lum += wrap;
  360. }
  361. }
  362. /* XXX: use generic filter ? */
  363. /* 1x2 -> 1x1 */
  364. static void shrink2(UINT8 *dst, int dst_wrap,
  365. UINT8 *src, int src_wrap,
  366. int width, int height)
  367. {
  368. int w;
  369. UINT8 *s1, *s2, *d;
  370. for(;height > 0; height--) {
  371. s1 = src;
  372. s2 = s1 + src_wrap;
  373. d = dst;
  374. for(w = width;w >= 4; w-=4) {
  375. d[0] = (s1[0] + s2[0]) >> 1;
  376. d[1] = (s1[1] + s2[1]) >> 1;
  377. d[2] = (s1[2] + s2[2]) >> 1;
  378. d[3] = (s1[3] + s2[3]) >> 1;
  379. s1 += 4;
  380. s2 += 4;
  381. d += 4;
  382. }
  383. for(;w > 0; w--) {
  384. d[0] = (s1[0] + s2[0]) >> 1;
  385. s1++;
  386. s2++;
  387. d++;
  388. }
  389. src += 2 * src_wrap;
  390. dst += dst_wrap;
  391. }
  392. }
  393. /* 2x2 -> 1x1 */
  394. static void shrink22(UINT8 *dst, int dst_wrap,
  395. UINT8 *src, int src_wrap,
  396. int width, int height)
  397. {
  398. int w;
  399. UINT8 *s1, *s2, *d;
  400. for(;height > 0; height--) {
  401. s1 = src;
  402. s2 = s1 + src_wrap;
  403. d = dst;
  404. for(w = width;w >= 4; w-=4) {
  405. d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
  406. d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
  407. d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
  408. d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
  409. s1 += 8;
  410. s2 += 8;
  411. d += 4;
  412. }
  413. for(;w > 0; w--) {
  414. d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
  415. s1 += 2;
  416. s2 += 2;
  417. d++;
  418. }
  419. src += 2 * src_wrap;
  420. dst += dst_wrap;
  421. }
  422. }
  423. /* 1x1 -> 2x2 */
  424. static void grow22(UINT8 *dst, int dst_wrap,
  425. UINT8 *src, int src_wrap,
  426. int width, int height)
  427. {
  428. int w;
  429. UINT8 *s1, *d;
  430. for(;height > 0; height--) {
  431. s1 = src;
  432. d = dst;
  433. for(w = width;w >= 4; w-=4) {
  434. d[1] = d[0] = s1[0];
  435. d[3] = d[2] = s1[1];
  436. s1 += 2;
  437. d += 4;
  438. }
  439. for(;w > 0; w--) {
  440. d[0] = s1[0];
  441. s1 ++;
  442. d++;
  443. }
  444. if (height%2)
  445. src += src_wrap;
  446. dst += dst_wrap;
  447. }
  448. }
  449. /* 1x2 -> 2x1. width and height are given for the source picture */
  450. static void conv411(UINT8 *dst, int dst_wrap,
  451. UINT8 *src, int src_wrap,
  452. int width, int height)
  453. {
  454. int w, c;
  455. UINT8 *s1, *s2, *d;
  456. for(;height > 0; height -= 2) {
  457. s1 = src;
  458. s2 = src + src_wrap;
  459. d = dst;
  460. for(w = width;w > 0; w--) {
  461. c = (s1[0] + s2[0]) >> 1;
  462. d[0] = c;
  463. d[1] = c;
  464. s1++;
  465. s2++;
  466. d += 2;
  467. }
  468. src += src_wrap * 2;
  469. dst += dst_wrap;
  470. }
  471. }
  472. static void img_copy(UINT8 *dst, int dst_wrap,
  473. UINT8 *src, int src_wrap,
  474. int width, int height)
  475. {
  476. for(;height > 0; height--) {
  477. memcpy(dst, src, width);
  478. dst += dst_wrap;
  479. src += src_wrap;
  480. }
  481. }
  482. #define SCALE_BITS 10
  483. #define C_Y (76309 >> (16 - SCALE_BITS))
  484. #define C_RV (117504 >> (16 - SCALE_BITS))
  485. #define C_BU (138453 >> (16 - SCALE_BITS))
  486. #define C_GU (13954 >> (16 - SCALE_BITS))
  487. #define C_GV (34903 >> (16 - SCALE_BITS))
  488. #define RGBOUT(r, g, b, y1)\
  489. {\
  490. y = (y1 - 16) * C_Y;\
  491. r = cm[(y + r_add) >> SCALE_BITS];\
  492. g = cm[(y + g_add) >> SCALE_BITS];\
  493. b = cm[(y + b_add) >> SCALE_BITS];\
  494. }
  495. /* XXX: no chroma interpolating is done */
  496. static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src,
  497. int width, int height)
  498. {
  499. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  500. int w, y, cb, cr, r_add, g_add, b_add, width2;
  501. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  502. d = dst->data[0];
  503. y1_ptr = src->data[0];
  504. cb_ptr = src->data[1];
  505. cr_ptr = src->data[2];
  506. width2 = width >> 1;
  507. for(;height > 0; height -= 2) {
  508. d1 = d;
  509. d2 = d + dst->linesize[0];
  510. y2_ptr = y1_ptr + src->linesize[0];
  511. for(w = width2; w > 0; w --) {
  512. cb = cb_ptr[0] - 128;
  513. cr = cr_ptr[0] - 128;
  514. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  515. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  516. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  517. /* output 4 pixels */
  518. RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]);
  519. RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]);
  520. RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]);
  521. RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]);
  522. d1[3] = d1[7] = d2[3] = d2[7] = 255;
  523. d1 += 8;
  524. d2 += 8;
  525. y1_ptr += 2;
  526. y2_ptr += 2;
  527. cb_ptr++;
  528. cr_ptr++;
  529. }
  530. d += 2 * dst->linesize[0];
  531. y1_ptr += 2 * src->linesize[0] - width;
  532. cb_ptr += src->linesize[1] - width2;
  533. cr_ptr += src->linesize[2] - width2;
  534. }
  535. }
  536. /* XXX: no chroma interpolating is done */
  537. static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src,
  538. int width, int height)
  539. {
  540. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  541. int w, y, cb, cr, r_add, g_add, b_add, width2;
  542. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  543. d = dst->data[0];
  544. y1_ptr = src->data[0];
  545. cb_ptr = src->data[1];
  546. cr_ptr = src->data[2];
  547. width2 = width >> 1;
  548. for(;height > 0; height -= 2) {
  549. d1 = d;
  550. d2 = d + dst->linesize[0];
  551. y2_ptr = y1_ptr + src->linesize[0];
  552. for(w = width2; w > 0; w --) {
  553. cb = cb_ptr[0] - 128;
  554. cr = cr_ptr[0] - 128;
  555. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  556. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  557. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  558. /* output 4 pixels */
  559. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  560. RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]);
  561. RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
  562. RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]);
  563. d1[3] = d1[7] = d2[3] = d2[7] = 255;
  564. d1 += 8;
  565. d2 += 8;
  566. y1_ptr += 2;
  567. y2_ptr += 2;
  568. cb_ptr++;
  569. cr_ptr++;
  570. }
  571. d += 2 * dst->linesize[0];
  572. y1_ptr += 2 * src->linesize[0] - width;
  573. cb_ptr += src->linesize[1] - width2;
  574. cr_ptr += src->linesize[2] - width2;
  575. }
  576. }
  577. /* XXX: no chroma interpolating is done */
  578. static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
  579. int width, int height)
  580. {
  581. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  582. int w, y, cb, cr, r_add, g_add, b_add, width2;
  583. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  584. d = dst->data[0];
  585. y1_ptr = src->data[0];
  586. cb_ptr = src->data[1];
  587. cr_ptr = src->data[2];
  588. width2 = width >> 1;
  589. for(;height > 0; height -= 2) {
  590. d1 = d;
  591. d2 = d + dst->linesize[0];
  592. y2_ptr = y1_ptr + src->linesize[0];
  593. for(w = width2; w > 0; w --) {
  594. cb = cb_ptr[0] - 128;
  595. cr = cr_ptr[0] - 128;
  596. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  597. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  598. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  599. /* output 4 pixels */
  600. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  601. RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
  602. RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
  603. RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
  604. d1 += 6;
  605. d2 += 6;
  606. y1_ptr += 2;
  607. y2_ptr += 2;
  608. cb_ptr++;
  609. cr_ptr++;
  610. }
  611. d += 2 * dst->linesize[0];
  612. y1_ptr += 2 * src->linesize[0] - width;
  613. cb_ptr += src->linesize[1] - width2;
  614. cr_ptr += src->linesize[2] - width2;
  615. }
  616. }
  617. /* XXX: no chroma interpolating is done */
  618. static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
  619. int width, int height)
  620. {
  621. UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
  622. int w, y, cb, cr, r_add, g_add, b_add, width2;
  623. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  624. d = dst->data[0];
  625. y1_ptr = src->data[0];
  626. cb_ptr = src->data[1];
  627. cr_ptr = src->data[2];
  628. width2 = width >> 1;
  629. for(;height > 0; height --) {
  630. d1 = d;
  631. for(w = width2; w > 0; w --) {
  632. cb = cb_ptr[0] - 128;
  633. cr = cr_ptr[0] - 128;
  634. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  635. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  636. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  637. /* output 2 pixels */
  638. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  639. RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
  640. d1 += 6;
  641. y1_ptr += 2;
  642. cb_ptr++;
  643. cr_ptr++;
  644. }
  645. d += dst->linesize[0];
  646. y1_ptr += src->linesize[0] - width;
  647. cb_ptr += src->linesize[1] - width2;
  648. cr_ptr += src->linesize[2] - width2;
  649. }
  650. }
  651. /* XXX: always use linesize. Return -1 if not supported */
  652. int img_convert(AVPicture *dst, int dst_pix_fmt,
  653. AVPicture *src, int pix_fmt,
  654. int width, int height)
  655. {
  656. int i;
  657. assert(pix_fmt != PIX_FMT_ANY && dst_pix_fmt != PIX_FMT_ANY);
  658. if (dst_pix_fmt == pix_fmt) {
  659. switch(pix_fmt) {
  660. case PIX_FMT_YUV420P:
  661. for(i=0;i<3;i++) {
  662. if (i == 1) {
  663. width >>= 1;
  664. height >>= 1;
  665. }
  666. img_copy(dst->data[i], dst->linesize[i],
  667. src->data[i], src->linesize[i],
  668. width, height);
  669. }
  670. break;
  671. default:
  672. return -1;
  673. }
  674. } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
  675. switch(pix_fmt) {
  676. case PIX_FMT_YUV411P:
  677. img_copy(dst->data[0], dst->linesize[0],
  678. src->data[0], src->linesize[0],
  679. width, height);
  680. conv411(dst->data[1], dst->linesize[1],
  681. src->data[1], src->linesize[1],
  682. width / 4, height);
  683. conv411(dst->data[2], dst->linesize[2],
  684. src->data[2], src->linesize[2],
  685. width / 4, height);
  686. break;
  687. case PIX_FMT_YUV410P:
  688. img_copy(dst->data[0], dst->linesize[0],
  689. src->data[0], src->linesize[0],
  690. width, height);
  691. grow22(dst->data[1], dst->linesize[1],
  692. src->data[1], src->linesize[1],
  693. width/2, height/2);
  694. grow22(dst->data[2], dst->linesize[2],
  695. src->data[2], src->linesize[2],
  696. width/2, height/2);
  697. break;
  698. case PIX_FMT_YUV420P:
  699. for(i=0;i<3;i++) {
  700. img_copy(dst->data[i], dst->linesize[i],
  701. src->data[i], src->linesize[i],
  702. width, height);
  703. }
  704. break;
  705. case PIX_FMT_YUV422P:
  706. img_copy(dst->data[0], dst->linesize[0],
  707. src->data[0], src->linesize[0],
  708. width, height);
  709. width >>= 1;
  710. height >>= 1;
  711. for(i=1;i<3;i++) {
  712. shrink2(dst->data[i], dst->linesize[i],
  713. src->data[i], src->linesize[i],
  714. width, height);
  715. }
  716. break;
  717. case PIX_FMT_YUV444P:
  718. img_copy(dst->data[0], dst->linesize[0],
  719. src->data[0], src->linesize[0],
  720. width, height);
  721. width >>= 1;
  722. height >>= 1;
  723. for(i=1;i<3;i++) {
  724. shrink22(dst->data[i], dst->linesize[i],
  725. src->data[i], src->linesize[i],
  726. width, height);
  727. }
  728. break;
  729. case PIX_FMT_YUV422:
  730. yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  731. src->data[0], width, height);
  732. break;
  733. case PIX_FMT_RGB24:
  734. rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  735. src->data[0], width, height);
  736. break;
  737. case PIX_FMT_RGBA32:
  738. rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  739. src->data[0], width, height);
  740. break;
  741. case PIX_FMT_BGR24:
  742. bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  743. src->data[0], width, height);
  744. break;
  745. case PIX_FMT_BGRA32:
  746. bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  747. src->data[0], width, height);
  748. break;
  749. case PIX_FMT_RGB565:
  750. rgb565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  751. src->data[0], width, height);
  752. break;
  753. case PIX_FMT_RGB555:
  754. rgb555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  755. src->data[0], width, height);
  756. break;
  757. /* case PIX_FMT_RGB5551:
  758. rgb5551_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  759. src->data[0], width, height);
  760. break;*/
  761. case PIX_FMT_BGR565:
  762. bgr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  763. src->data[0], width, height);
  764. break;
  765. case PIX_FMT_BGR555:
  766. bgr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  767. src->data[0], width, height);
  768. break;
  769. /* case PIX_FMT_GBR565:
  770. gbr565_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  771. src->data[0], width, height);
  772. break;
  773. case PIX_FMT_GBR555:
  774. gbr555_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  775. src->data[0], width, height);
  776. break;*/
  777. default:
  778. return -1;
  779. }
  780. } else if (dst_pix_fmt == PIX_FMT_RGB24) {
  781. switch(pix_fmt) {
  782. case PIX_FMT_YUV420P:
  783. yuv420p_to_rgb24(dst, src, width, height);
  784. break;
  785. case PIX_FMT_YUV422P:
  786. yuv422p_to_rgb24(dst, src, width, height);
  787. break;
  788. default:
  789. return -1;
  790. }
  791. } else if (dst_pix_fmt == PIX_FMT_RGBA32) {
  792. switch(pix_fmt) {
  793. case PIX_FMT_YUV420P:
  794. yuv420p_to_rgba32(dst, src, width, height);
  795. break;
  796. default:
  797. return -1;
  798. }
  799. } else if (dst_pix_fmt == PIX_FMT_BGRA32) {
  800. switch(pix_fmt) {
  801. case PIX_FMT_YUV420P:
  802. yuv420p_to_bgra32(dst, src, width, height);
  803. break;
  804. default:
  805. return -1;
  806. }
  807. } else {
  808. return -1;
  809. }
  810. return 0;
  811. }
  812. #ifdef HAVE_MMX
  813. #define DEINT_INPLACE_LINE_LUM \
  814. movd_m2r(lum_m4[0],mm0);\
  815. movd_m2r(lum_m3[0],mm1);\
  816. movd_m2r(lum_m2[0],mm2);\
  817. movd_m2r(lum_m1[0],mm3);\
  818. movd_m2r(lum[0],mm4);\
  819. punpcklbw_r2r(mm7,mm0);\
  820. movd_r2m(mm2,lum_m4[0]);\
  821. punpcklbw_r2r(mm7,mm1);\
  822. punpcklbw_r2r(mm7,mm2);\
  823. punpcklbw_r2r(mm7,mm3);\
  824. punpcklbw_r2r(mm7,mm4);\
  825. paddw_r2r(mm3,mm1);\
  826. psllw_i2r(1,mm2);\
  827. paddw_r2r(mm4,mm0);\
  828. psllw_i2r(2,mm1);\
  829. paddw_r2r(mm6,mm2);\
  830. paddw_r2r(mm2,mm1);\
  831. psubusw_r2r(mm0,mm1);\
  832. psrlw_i2r(3,mm1);\
  833. packuswb_r2r(mm7,mm1);\
  834. movd_r2m(mm1,lum_m2[0]);
  835. #define DEINT_LINE_LUM \
  836. movd_m2r(lum_m4[0],mm0);\
  837. movd_m2r(lum_m3[0],mm1);\
  838. movd_m2r(lum_m2[0],mm2);\
  839. movd_m2r(lum_m1[0],mm3);\
  840. movd_m2r(lum[0],mm4);\
  841. punpcklbw_r2r(mm7,mm0);\
  842. punpcklbw_r2r(mm7,mm1);\
  843. punpcklbw_r2r(mm7,mm2);\
  844. punpcklbw_r2r(mm7,mm3);\
  845. punpcklbw_r2r(mm7,mm4);\
  846. paddw_r2r(mm3,mm1);\
  847. psllw_i2r(1,mm2);\
  848. paddw_r2r(mm4,mm0);\
  849. psllw_i2r(2,mm1);\
  850. paddw_r2r(mm6,mm2);\
  851. paddw_r2r(mm2,mm1);\
  852. psubusw_r2r(mm0,mm1);\
  853. psrlw_i2r(3,mm1);\
  854. packuswb_r2r(mm7,mm1);\
  855. movd_r2m(mm1,dst[0]);
  856. #endif
  857. /* filter parameters: [-1 4 2 4 -1] // 8 */
  858. static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
  859. int size)
  860. {
  861. #ifndef HAVE_MMX
  862. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  863. int sum;
  864. for(;size > 0;size--) {
  865. sum = -lum_m4[0];
  866. sum += lum_m3[0] << 2;
  867. sum += lum_m2[0] << 1;
  868. sum += lum_m1[0] << 2;
  869. sum += -lum[0];
  870. dst[0] = cm[(sum + 4) >> 3];
  871. lum_m4++;
  872. lum_m3++;
  873. lum_m2++;
  874. lum_m1++;
  875. lum++;
  876. dst++;
  877. }
  878. #else
  879. for (;size > 3; size-=4) {
  880. DEINT_LINE_LUM
  881. lum_m4+=4;
  882. lum_m3+=4;
  883. lum_m2+=4;
  884. lum_m1+=4;
  885. lum+=4;
  886. dst+=4;
  887. }
  888. #endif
  889. }
  890. static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
  891. int size)
  892. {
  893. #ifndef HAVE_MMX
  894. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  895. int sum;
  896. for(;size > 0;size--) {
  897. sum = -lum_m4[0];
  898. sum += lum_m3[0] << 2;
  899. sum += lum_m2[0] << 1;
  900. lum_m4[0]=lum_m2[0];
  901. sum += lum_m1[0] << 2;
  902. sum += -lum[0];
  903. lum_m2[0] = cm[(sum + 4) >> 3];
  904. lum_m4++;
  905. lum_m3++;
  906. lum_m2++;
  907. lum_m1++;
  908. lum++;
  909. }
  910. #else
  911. for (;size > 3; size-=4) {
  912. DEINT_INPLACE_LINE_LUM
  913. lum_m4+=4;
  914. lum_m3+=4;
  915. lum_m2+=4;
  916. lum_m1+=4;
  917. lum+=4;
  918. }
  919. #endif
  920. }
  921. /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
  922. top field is copied as is, but the bottom field is deinterlaced
  923. against the top field. */
  924. static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
  925. UINT8 *src1, int src_wrap,
  926. int width, int height)
  927. {
  928. UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
  929. int y;
  930. src_m2 = src1;
  931. src_m1 = src1;
  932. src_0=&src_m1[src_wrap];
  933. src_p1=&src_0[src_wrap];
  934. src_p2=&src_p1[src_wrap];
  935. for(y=0;y<(height-2);y+=2) {
  936. memcpy(dst,src_m1,width);
  937. dst += dst_wrap;
  938. deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
  939. src_m2 = src_0;
  940. src_m1 = src_p1;
  941. src_0 = src_p2;
  942. src_p1 += 2*src_wrap;
  943. src_p2 += 2*src_wrap;
  944. dst += dst_wrap;
  945. }
  946. memcpy(dst,src_m1,width);
  947. dst += dst_wrap;
  948. /* do last line */
  949. deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
  950. }
  951. static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
  952. int width, int height)
  953. {
  954. UINT8 *src_m1, *src_0, *src_p1, *src_p2;
  955. int y;
  956. UINT8 *buf;
  957. buf = (UINT8*)av_malloc(width);
  958. src_m1 = src1;
  959. memcpy(buf,src_m1,width);
  960. src_0=&src_m1[src_wrap];
  961. src_p1=&src_0[src_wrap];
  962. src_p2=&src_p1[src_wrap];
  963. for(y=0;y<(height-2);y+=2) {
  964. deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
  965. src_m1 = src_p1;
  966. src_0 = src_p2;
  967. src_p1 += 2*src_wrap;
  968. src_p2 += 2*src_wrap;
  969. }
  970. /* do last line */
  971. deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
  972. av_free(buf);
  973. }
  974. /* deinterlace - if not supported return -1 */
  975. int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
  976. int pix_fmt, int width, int height)
  977. {
  978. int i;
  979. if (pix_fmt != PIX_FMT_YUV420P &&
  980. pix_fmt != PIX_FMT_YUV422P &&
  981. pix_fmt != PIX_FMT_YUV444P)
  982. return -1;
  983. if ((width & 3) != 0 || (height & 3) != 0)
  984. return -1;
  985. #ifdef HAVE_MMX
  986. {
  987. mmx_t rounder;
  988. rounder.uw[0]=4;
  989. rounder.uw[1]=4;
  990. rounder.uw[2]=4;
  991. rounder.uw[3]=4;
  992. pxor_r2r(mm7,mm7);
  993. movq_m2r(rounder,mm6);
  994. }
  995. #endif
  996. for(i=0;i<3;i++) {
  997. if (i == 1) {
  998. switch(pix_fmt) {
  999. case PIX_FMT_YUV420P:
  1000. width >>= 1;
  1001. height >>= 1;
  1002. break;
  1003. case PIX_FMT_YUV422P:
  1004. width >>= 1;
  1005. break;
  1006. default:
  1007. break;
  1008. }
  1009. }
  1010. if (src == dst) {
  1011. deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
  1012. width, height);
  1013. } else {
  1014. deinterlace_bottom_field(dst->data[i],dst->linesize[i],
  1015. src->data[i], src->linesize[i],
  1016. width, height);
  1017. }
  1018. }
  1019. #ifdef HAVE_MMX
  1020. emms();
  1021. #endif
  1022. return 0;
  1023. }
  1024. #undef FIX