You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1000 lines
29KB

  1. /*
  2. * Misc image convertion routines
  3. * Copyright (c) 2001, 2002 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include "avcodec.h"
  20. #include "dsputil.h"
  21. #ifdef USE_FASTMEMCPY
  22. #include "fastmemcpy.h"
  23. #endif
  24. #ifdef HAVE_MMX
  25. #include "i386/mmx.h"
  26. #endif
  27. /* XXX: totally non optimized */
  28. static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  29. UINT8 *src, int width, int height)
  30. {
  31. int x, y;
  32. UINT8 *p = src;
  33. for(y=0;y<height;y+=2) {
  34. for(x=0;x<width;x+=2) {
  35. lum[0] = p[0];
  36. cb[0] = p[1];
  37. lum[1] = p[2];
  38. cr[0] = p[3];
  39. p += 4;
  40. lum += 2;
  41. cb++;
  42. cr++;
  43. }
  44. for(x=0;x<width;x+=2) {
  45. lum[0] = p[0];
  46. lum[1] = p[2];
  47. p += 4;
  48. lum += 2;
  49. }
  50. }
  51. }
  52. #define SCALEBITS 8
  53. #define ONE_HALF (1 << (SCALEBITS - 1))
  54. #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5))
  55. static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  56. UINT8 *src, int width, int height)
  57. {
  58. int wrap, wrap3, x, y;
  59. int r, g, b, r1, g1, b1;
  60. UINT8 *p;
  61. wrap = width;
  62. wrap3 = width * 3;
  63. p = src;
  64. for(y=0;y<height;y+=2) {
  65. for(x=0;x<width;x+=2) {
  66. r = p[0];
  67. g = p[1];
  68. b = p[2];
  69. r1 = r;
  70. g1 = g;
  71. b1 = b;
  72. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  73. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  74. r = p[3];
  75. g = p[4];
  76. b = p[5];
  77. r1 += r;
  78. g1 += g;
  79. b1 += b;
  80. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  81. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  82. p += wrap3;
  83. lum += wrap;
  84. r = p[0];
  85. g = p[1];
  86. b = p[2];
  87. r1 += r;
  88. g1 += g;
  89. b1 += b;
  90. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  91. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  92. r = p[3];
  93. g = p[4];
  94. b = p[5];
  95. r1 += r;
  96. g1 += g;
  97. b1 += b;
  98. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  99. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  100. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  101. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  102. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  103. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  104. cb++;
  105. cr++;
  106. p += -wrap3 + 2 * 3;
  107. lum += -wrap + 2;
  108. }
  109. p += wrap3;
  110. lum += wrap;
  111. }
  112. }
  113. static void rgba32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  114. UINT8 *src, int width, int height)
  115. {
  116. int wrap, wrap4, x, y;
  117. int r, g, b, r1, g1, b1;
  118. UINT8 *p;
  119. wrap = width;
  120. wrap4 = width * 4;
  121. p = src;
  122. for(y=0;y<height;y+=2) {
  123. for(x=0;x<width;x+=2) {
  124. r = p[0];
  125. g = p[1];
  126. b = p[2];
  127. r1 = r;
  128. g1 = g;
  129. b1 = b;
  130. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  131. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  132. r = p[4];
  133. g = p[5];
  134. b = p[6];
  135. r1 += r;
  136. g1 += g;
  137. b1 += b;
  138. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  139. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  140. p += wrap4;
  141. lum += wrap;
  142. r = p[0];
  143. g = p[1];
  144. b = p[2];
  145. r1 += r;
  146. g1 += g;
  147. b1 += b;
  148. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  149. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  150. r = p[4];
  151. g = p[5];
  152. b = p[6];
  153. r1 += r;
  154. g1 += g;
  155. b1 += b;
  156. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  157. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  158. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  159. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  160. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  161. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  162. cb++;
  163. cr++;
  164. p += -wrap4 + 2 * 4;
  165. lum += -wrap + 2;
  166. }
  167. p += wrap4;
  168. lum += wrap;
  169. }
  170. }
  171. static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  172. UINT8 *src, int width, int height)
  173. {
  174. int wrap, wrap3, x, y;
  175. int r, g, b, r1, g1, b1;
  176. UINT8 *p;
  177. wrap = width;
  178. wrap3 = width * 3;
  179. p = src;
  180. for(y=0;y<height;y+=2) {
  181. for(x=0;x<width;x+=2) {
  182. b = p[0];
  183. g = p[1];
  184. r = p[2];
  185. r1 = r;
  186. g1 = g;
  187. b1 = b;
  188. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  189. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  190. b = p[3];
  191. g = p[4];
  192. r = p[5];
  193. r1 += r;
  194. g1 += g;
  195. b1 += b;
  196. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  197. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  198. p += wrap3;
  199. lum += wrap;
  200. b = p[0];
  201. g = p[1];
  202. r = p[2];
  203. r1 += r;
  204. g1 += g;
  205. b1 += b;
  206. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  207. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  208. b = p[3];
  209. g = p[4];
  210. r = p[5];
  211. r1 += r;
  212. g1 += g;
  213. b1 += b;
  214. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  215. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  216. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  217. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  218. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  219. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  220. cb++;
  221. cr++;
  222. p += -wrap3 + 2 * 3;
  223. lum += -wrap + 2;
  224. }
  225. p += wrap3;
  226. lum += wrap;
  227. }
  228. }
  229. static void bgra32_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr,
  230. UINT8 *src, int width, int height)
  231. {
  232. int wrap, wrap4, x, y;
  233. int r, g, b, r1, g1, b1;
  234. UINT8 *p;
  235. wrap = width;
  236. wrap4 = width * 4;
  237. p = src;
  238. for(y=0;y<height;y+=2) {
  239. for(x=0;x<width;x+=2) {
  240. b = p[0];
  241. g = p[1];
  242. r = p[2];
  243. r1 = r;
  244. g1 = g;
  245. b1 = b;
  246. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  247. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  248. b = p[4];
  249. g = p[5];
  250. r = p[6];
  251. r1 += r;
  252. g1 += g;
  253. b1 += b;
  254. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  255. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  256. p += wrap4;
  257. lum += wrap;
  258. b = p[0];
  259. g = p[1];
  260. r = p[2];
  261. r1 += r;
  262. g1 += g;
  263. b1 += b;
  264. lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g +
  265. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  266. b = p[4];
  267. g = p[5];
  268. r = p[6];
  269. r1 += r;
  270. g1 += g;
  271. b1 += b;
  272. lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g +
  273. FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;
  274. cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +
  275. FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  276. cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -
  277. FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;
  278. cb++;
  279. cr++;
  280. p += -wrap4 + 2 * 4;
  281. lum += -wrap + 2;
  282. }
  283. p += wrap4;
  284. lum += wrap;
  285. }
  286. }
  287. /* XXX: use generic filter ? */
  288. /* 1x2 -> 1x1 */
  289. static void shrink2(UINT8 *dst, int dst_wrap,
  290. UINT8 *src, int src_wrap,
  291. int width, int height)
  292. {
  293. int w;
  294. UINT8 *s1, *s2, *d;
  295. for(;height > 0; height--) {
  296. s1 = src;
  297. s2 = s1 + src_wrap;
  298. d = dst;
  299. for(w = width;w >= 4; w-=4) {
  300. d[0] = (s1[0] + s2[0]) >> 1;
  301. d[1] = (s1[1] + s2[1]) >> 1;
  302. d[2] = (s1[2] + s2[2]) >> 1;
  303. d[3] = (s1[3] + s2[3]) >> 1;
  304. s1 += 4;
  305. s2 += 4;
  306. d += 4;
  307. }
  308. for(;w > 0; w--) {
  309. d[0] = (s1[0] + s2[0]) >> 1;
  310. s1++;
  311. s2++;
  312. d++;
  313. }
  314. src += 2 * src_wrap;
  315. dst += dst_wrap;
  316. }
  317. }
  318. /* 2x2 -> 1x1 */
  319. static void shrink22(UINT8 *dst, int dst_wrap,
  320. UINT8 *src, int src_wrap,
  321. int width, int height)
  322. {
  323. int w;
  324. UINT8 *s1, *s2, *d;
  325. for(;height > 0; height--) {
  326. s1 = src;
  327. s2 = s1 + src_wrap;
  328. d = dst;
  329. for(w = width;w >= 4; w-=4) {
  330. d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
  331. d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 1;
  332. d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 1;
  333. d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 1;
  334. s1 += 8;
  335. s2 += 8;
  336. d += 4;
  337. }
  338. for(;w > 0; w--) {
  339. d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 1;
  340. s1 += 2;
  341. s2 += 2;
  342. d++;
  343. }
  344. src += 2 * src_wrap;
  345. dst += dst_wrap;
  346. }
  347. }
  348. /* 1x1 -> 2x2 */
  349. static void grow22(UINT8 *dst, int dst_wrap,
  350. UINT8 *src, int src_wrap,
  351. int width, int height)
  352. {
  353. int w;
  354. UINT8 *s1, *d;
  355. for(;height > 0; height--) {
  356. s1 = src;
  357. d = dst;
  358. for(w = width;w >= 4; w-=4) {
  359. d[1] = d[0] = s1[0];
  360. d[3] = d[2] = s1[1];
  361. s1 += 2;
  362. d += 4;
  363. }
  364. for(;w > 0; w--) {
  365. d[0] = s1[0];
  366. s1 ++;
  367. d++;
  368. }
  369. if (height%2)
  370. src += src_wrap;
  371. dst += dst_wrap;
  372. }
  373. }
  374. /* 1x2 -> 2x1. width and height are given for the source picture */
  375. static void conv411(UINT8 *dst, int dst_wrap,
  376. UINT8 *src, int src_wrap,
  377. int width, int height)
  378. {
  379. int w, c;
  380. UINT8 *s1, *s2, *d;
  381. for(;height > 0; height -= 2) {
  382. s1 = src;
  383. s2 = src + src_wrap;
  384. d = dst;
  385. for(w = width;w > 0; w--) {
  386. c = (s1[0] + s2[0]) >> 1;
  387. d[0] = c;
  388. d[1] = c;
  389. s1++;
  390. s2++;
  391. d += 2;
  392. }
  393. src += src_wrap * 2;
  394. dst += dst_wrap;
  395. }
  396. }
  397. static void img_copy(UINT8 *dst, int dst_wrap,
  398. UINT8 *src, int src_wrap,
  399. int width, int height)
  400. {
  401. for(;height > 0; height--) {
  402. memcpy(dst, src, width);
  403. dst += dst_wrap;
  404. src += src_wrap;
  405. }
  406. }
  407. #define SCALE_BITS 10
  408. #define C_Y (76309 >> (16 - SCALE_BITS))
  409. #define C_RV (117504 >> (16 - SCALE_BITS))
  410. #define C_BU (138453 >> (16 - SCALE_BITS))
  411. #define C_GU (13954 >> (16 - SCALE_BITS))
  412. #define C_GV (34903 >> (16 - SCALE_BITS))
  413. #define RGBOUT(r, g, b, y1)\
  414. {\
  415. y = (y1 - 16) * C_Y;\
  416. r = cm[(y + r_add) >> SCALE_BITS];\
  417. g = cm[(y + g_add) >> SCALE_BITS];\
  418. b = cm[(y + b_add) >> SCALE_BITS];\
  419. }
  420. /* XXX: no chroma interpolating is done */
  421. static void yuv420p_to_bgra32(AVPicture *dst, AVPicture *src,
  422. int width, int height)
  423. {
  424. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  425. int w, y, cb, cr, r_add, g_add, b_add, width2;
  426. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  427. d = dst->data[0];
  428. y1_ptr = src->data[0];
  429. cb_ptr = src->data[1];
  430. cr_ptr = src->data[2];
  431. width2 = width >> 1;
  432. for(;height > 0; height -= 2) {
  433. d1 = d;
  434. d2 = d + dst->linesize[0];
  435. y2_ptr = y1_ptr + src->linesize[0];
  436. for(w = width2; w > 0; w --) {
  437. cb = cb_ptr[0] - 128;
  438. cr = cr_ptr[0] - 128;
  439. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  440. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  441. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  442. /* output 4 pixels */
  443. RGBOUT(d1[2], d1[1], d1[0], y1_ptr[0]);
  444. RGBOUT(d1[6], d1[5], d1[4], y1_ptr[1]);
  445. RGBOUT(d2[2], d2[1], d2[0], y2_ptr[0]);
  446. RGBOUT(d2[6], d2[5], d2[4], y2_ptr[1]);
  447. d1[3] = d1[7] = d2[3] = d2[7] = 255;
  448. d1 += 8;
  449. d2 += 8;
  450. y1_ptr += 2;
  451. y2_ptr += 2;
  452. cb_ptr++;
  453. cr_ptr++;
  454. }
  455. d += 2 * dst->linesize[0];
  456. y1_ptr += 2 * src->linesize[0] - width;
  457. cb_ptr += src->linesize[1] - width2;
  458. cr_ptr += src->linesize[2] - width2;
  459. }
  460. }
  461. /* XXX: no chroma interpolating is done */
  462. static void yuv420p_to_rgba32(AVPicture *dst, AVPicture *src,
  463. int width, int height)
  464. {
  465. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  466. int w, y, cb, cr, r_add, g_add, b_add, width2;
  467. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  468. d = dst->data[0];
  469. y1_ptr = src->data[0];
  470. cb_ptr = src->data[1];
  471. cr_ptr = src->data[2];
  472. width2 = width >> 1;
  473. for(;height > 0; height -= 2) {
  474. d1 = d;
  475. d2 = d + dst->linesize[0];
  476. y2_ptr = y1_ptr + src->linesize[0];
  477. for(w = width2; w > 0; w --) {
  478. cb = cb_ptr[0] - 128;
  479. cr = cr_ptr[0] - 128;
  480. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  481. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  482. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  483. /* output 4 pixels */
  484. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  485. RGBOUT(d1[4], d1[5], d1[6], y1_ptr[1]);
  486. RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
  487. RGBOUT(d2[4], d2[5], d2[6], y2_ptr[1]);
  488. d1[3] = d1[7] = d2[3] = d2[7] = 255;
  489. d1 += 8;
  490. d2 += 8;
  491. y1_ptr += 2;
  492. y2_ptr += 2;
  493. cb_ptr++;
  494. cr_ptr++;
  495. }
  496. d += 2 * dst->linesize[0];
  497. y1_ptr += 2 * src->linesize[0] - width;
  498. cb_ptr += src->linesize[1] - width2;
  499. cr_ptr += src->linesize[2] - width2;
  500. }
  501. }
  502. /* XXX: no chroma interpolating is done */
  503. static void yuv420p_to_rgb24(AVPicture *dst, AVPicture *src,
  504. int width, int height)
  505. {
  506. UINT8 *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr, *d, *d1, *d2;
  507. int w, y, cb, cr, r_add, g_add, b_add, width2;
  508. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  509. d = dst->data[0];
  510. y1_ptr = src->data[0];
  511. cb_ptr = src->data[1];
  512. cr_ptr = src->data[2];
  513. width2 = width >> 1;
  514. for(;height > 0; height -= 2) {
  515. d1 = d;
  516. d2 = d + dst->linesize[0];
  517. y2_ptr = y1_ptr + src->linesize[0];
  518. for(w = width2; w > 0; w --) {
  519. cb = cb_ptr[0] - 128;
  520. cr = cr_ptr[0] - 128;
  521. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  522. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  523. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  524. /* output 4 pixels */
  525. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  526. RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
  527. RGBOUT(d2[0], d2[1], d2[2], y2_ptr[0]);
  528. RGBOUT(d2[3], d2[4], d2[5], y2_ptr[1]);
  529. d1 += 6;
  530. d2 += 6;
  531. y1_ptr += 2;
  532. y2_ptr += 2;
  533. cb_ptr++;
  534. cr_ptr++;
  535. }
  536. d += 2 * dst->linesize[0];
  537. y1_ptr += 2 * src->linesize[0] - width;
  538. cb_ptr += src->linesize[1] - width2;
  539. cr_ptr += src->linesize[2] - width2;
  540. }
  541. }
  542. /* XXX: no chroma interpolating is done */
  543. static void yuv422p_to_rgb24(AVPicture *dst, AVPicture *src,
  544. int width, int height)
  545. {
  546. UINT8 *y1_ptr, *cb_ptr, *cr_ptr, *d, *d1;
  547. int w, y, cb, cr, r_add, g_add, b_add, width2;
  548. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  549. d = dst->data[0];
  550. y1_ptr = src->data[0];
  551. cb_ptr = src->data[1];
  552. cr_ptr = src->data[2];
  553. width2 = width >> 1;
  554. for(;height > 0; height --) {
  555. d1 = d;
  556. for(w = width2; w > 0; w --) {
  557. cb = cb_ptr[0] - 128;
  558. cr = cr_ptr[0] - 128;
  559. r_add = C_RV * cr + (1 << (SCALE_BITS - 1));
  560. g_add = - C_GU * cb - C_GV * cr + (1 << (SCALE_BITS - 1));
  561. b_add = C_BU * cb + (1 << (SCALE_BITS - 1));
  562. /* output 2 pixels */
  563. RGBOUT(d1[0], d1[1], d1[2], y1_ptr[0]);
  564. RGBOUT(d1[3], d1[4], d1[5], y1_ptr[1]);
  565. d1 += 6;
  566. y1_ptr += 2;
  567. cb_ptr++;
  568. cr_ptr++;
  569. }
  570. d += dst->linesize[0];
  571. y1_ptr += src->linesize[0] - width;
  572. cb_ptr += src->linesize[1] - width2;
  573. cr_ptr += src->linesize[2] - width2;
  574. }
  575. }
  576. /* XXX: always use linesize. Return -1 if not supported */
  577. int img_convert(AVPicture *dst, int dst_pix_fmt,
  578. AVPicture *src, int pix_fmt,
  579. int width, int height)
  580. {
  581. int i;
  582. assert(pix_fmt != PIX_FMT_ANY && dst_pix_fmt != PIX_FMT_ANY);
  583. if (dst_pix_fmt == pix_fmt) {
  584. switch(pix_fmt) {
  585. case PIX_FMT_YUV420P:
  586. for(i=0;i<3;i++) {
  587. if (i == 1) {
  588. width >>= 1;
  589. height >>= 1;
  590. }
  591. img_copy(dst->data[i], dst->linesize[i],
  592. src->data[i], src->linesize[i],
  593. width, height);
  594. }
  595. break;
  596. default:
  597. return -1;
  598. }
  599. } else if (dst_pix_fmt == PIX_FMT_YUV420P) {
  600. switch(pix_fmt) {
  601. case PIX_FMT_YUV411P:
  602. img_copy(dst->data[0], dst->linesize[0],
  603. src->data[0], src->linesize[0],
  604. width, height);
  605. conv411(dst->data[1], dst->linesize[1],
  606. src->data[1], src->linesize[1],
  607. width / 4, height);
  608. conv411(dst->data[2], dst->linesize[2],
  609. src->data[2], src->linesize[2],
  610. width / 4, height);
  611. break;
  612. case PIX_FMT_YUV410P:
  613. img_copy(dst->data[0], dst->linesize[0],
  614. src->data[0], src->linesize[0],
  615. width, height);
  616. grow22(dst->data[1], dst->linesize[1],
  617. src->data[1], src->linesize[1],
  618. width/2, height/2);
  619. grow22(dst->data[2], dst->linesize[2],
  620. src->data[2], src->linesize[2],
  621. width/2, height/2);
  622. break;
  623. case PIX_FMT_YUV420P:
  624. for(i=0;i<3;i++) {
  625. img_copy(dst->data[i], dst->linesize[i],
  626. src->data[i], src->linesize[i],
  627. width, height);
  628. }
  629. break;
  630. case PIX_FMT_YUV422P:
  631. img_copy(dst->data[0], dst->linesize[0],
  632. src->data[0], src->linesize[0],
  633. width, height);
  634. width >>= 1;
  635. height >>= 1;
  636. for(i=1;i<3;i++) {
  637. shrink2(dst->data[i], dst->linesize[i],
  638. src->data[i], src->linesize[i],
  639. width, height);
  640. }
  641. break;
  642. case PIX_FMT_YUV444P:
  643. img_copy(dst->data[0], dst->linesize[0],
  644. src->data[0], src->linesize[0],
  645. width, height);
  646. width >>= 1;
  647. height >>= 1;
  648. for(i=1;i<3;i++) {
  649. shrink22(dst->data[i], dst->linesize[i],
  650. src->data[i], src->linesize[i],
  651. width, height);
  652. }
  653. break;
  654. case PIX_FMT_YUV422:
  655. yuv422_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  656. src->data[0], width, height);
  657. break;
  658. case PIX_FMT_RGB24:
  659. rgb24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  660. src->data[0], width, height);
  661. break;
  662. case PIX_FMT_RGBA32:
  663. rgba32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  664. src->data[0], width, height);
  665. break;
  666. case PIX_FMT_BGR24:
  667. bgr24_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  668. src->data[0], width, height);
  669. break;
  670. case PIX_FMT_BGRA32:
  671. bgra32_to_yuv420p(dst->data[0], dst->data[1], dst->data[2],
  672. src->data[0], width, height);
  673. break;
  674. default:
  675. return -1;
  676. }
  677. } else if (dst_pix_fmt == PIX_FMT_RGB24) {
  678. switch(pix_fmt) {
  679. case PIX_FMT_YUV420P:
  680. yuv420p_to_rgb24(dst, src, width, height);
  681. break;
  682. case PIX_FMT_YUV422P:
  683. yuv422p_to_rgb24(dst, src, width, height);
  684. break;
  685. default:
  686. return -1;
  687. }
  688. } else if (dst_pix_fmt == PIX_FMT_RGBA32) {
  689. switch(pix_fmt) {
  690. case PIX_FMT_YUV420P:
  691. yuv420p_to_rgba32(dst, src, width, height);
  692. break;
  693. default:
  694. return -1;
  695. }
  696. } else if (dst_pix_fmt == PIX_FMT_BGRA32) {
  697. switch(pix_fmt) {
  698. case PIX_FMT_YUV420P:
  699. yuv420p_to_bgra32(dst, src, width, height);
  700. break;
  701. default:
  702. return -1;
  703. }
  704. } else {
  705. return -1;
  706. }
  707. return 0;
  708. }
  709. #ifdef HAVE_MMX
  710. #define DEINT_INPLACE_LINE_LUM \
  711. movd_m2r(lum_m4[0],mm0);\
  712. movd_m2r(lum_m3[0],mm1);\
  713. movd_m2r(lum_m2[0],mm2);\
  714. movd_m2r(lum_m1[0],mm3);\
  715. movd_m2r(lum[0],mm4);\
  716. punpcklbw_r2r(mm7,mm0);\
  717. movd_r2m(mm2,lum_m4[0]);\
  718. punpcklbw_r2r(mm7,mm1);\
  719. punpcklbw_r2r(mm7,mm2);\
  720. punpcklbw_r2r(mm7,mm3);\
  721. punpcklbw_r2r(mm7,mm4);\
  722. paddw_r2r(mm3,mm1);\
  723. psllw_i2r(1,mm2);\
  724. paddw_r2r(mm4,mm0);\
  725. psllw_i2r(2,mm1);\
  726. paddw_r2r(mm6,mm2);\
  727. paddw_r2r(mm2,mm1);\
  728. psubusw_r2r(mm0,mm1);\
  729. psrlw_i2r(3,mm1);\
  730. packuswb_r2r(mm7,mm1);\
  731. movd_r2m(mm1,lum_m2[0]);
  732. #define DEINT_LINE_LUM \
  733. movd_m2r(lum_m4[0],mm0);\
  734. movd_m2r(lum_m3[0],mm1);\
  735. movd_m2r(lum_m2[0],mm2);\
  736. movd_m2r(lum_m1[0],mm3);\
  737. movd_m2r(lum[0],mm4);\
  738. punpcklbw_r2r(mm7,mm0);\
  739. punpcklbw_r2r(mm7,mm1);\
  740. punpcklbw_r2r(mm7,mm2);\
  741. punpcklbw_r2r(mm7,mm3);\
  742. punpcklbw_r2r(mm7,mm4);\
  743. paddw_r2r(mm3,mm1);\
  744. psllw_i2r(1,mm2);\
  745. paddw_r2r(mm4,mm0);\
  746. psllw_i2r(2,mm1);\
  747. paddw_r2r(mm6,mm2);\
  748. paddw_r2r(mm2,mm1);\
  749. psubusw_r2r(mm0,mm1);\
  750. psrlw_i2r(3,mm1);\
  751. packuswb_r2r(mm7,mm1);\
  752. movd_r2m(mm1,dst[0]);
  753. #endif
  754. /* filter parameters: [-1 4 2 4 -1] // 8 */
  755. static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
  756. int size)
  757. {
  758. #ifndef HAVE_MMX
  759. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  760. int sum;
  761. for(;size > 0;size--) {
  762. sum = -lum_m4[0];
  763. sum += lum_m3[0] << 2;
  764. sum += lum_m2[0] << 1;
  765. sum += lum_m1[0] << 2;
  766. sum += -lum[0];
  767. dst[0] = cm[(sum + 4) >> 3];
  768. lum_m4++;
  769. lum_m3++;
  770. lum_m2++;
  771. lum_m1++;
  772. lum++;
  773. dst++;
  774. }
  775. #else
  776. for (;size > 3; size-=4) {
  777. DEINT_LINE_LUM
  778. lum_m4+=4;
  779. lum_m3+=4;
  780. lum_m2+=4;
  781. lum_m1+=4;
  782. lum+=4;
  783. dst+=4;
  784. }
  785. #endif
  786. }
  787. static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum,
  788. int size)
  789. {
  790. #ifndef HAVE_MMX
  791. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  792. int sum;
  793. for(;size > 0;size--) {
  794. sum = -lum_m4[0];
  795. sum += lum_m3[0] << 2;
  796. sum += lum_m2[0] << 1;
  797. lum_m4[0]=lum_m2[0];
  798. sum += lum_m1[0] << 2;
  799. sum += -lum[0];
  800. lum_m2[0] = cm[(sum + 4) >> 3];
  801. lum_m4++;
  802. lum_m3++;
  803. lum_m2++;
  804. lum_m1++;
  805. lum++;
  806. }
  807. #else
  808. for (;size > 3; size-=4) {
  809. DEINT_INPLACE_LINE_LUM
  810. lum_m4+=4;
  811. lum_m3+=4;
  812. lum_m2+=4;
  813. lum_m1+=4;
  814. lum+=4;
  815. }
  816. #endif
  817. }
  818. /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
  819. top field is copied as is, but the bottom field is deinterlaced
  820. against the top field. */
  821. static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap,
  822. UINT8 *src1, int src_wrap,
  823. int width, int height)
  824. {
  825. UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
  826. int y;
  827. src_m2 = src1;
  828. src_m1 = src1;
  829. src_0=&src_m1[src_wrap];
  830. src_p1=&src_0[src_wrap];
  831. src_p2=&src_p1[src_wrap];
  832. for(y=0;y<(height-2);y+=2) {
  833. memcpy(dst,src_m1,width);
  834. dst += dst_wrap;
  835. deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
  836. src_m2 = src_0;
  837. src_m1 = src_p1;
  838. src_0 = src_p2;
  839. src_p1 += 2*src_wrap;
  840. src_p2 += 2*src_wrap;
  841. dst += dst_wrap;
  842. }
  843. memcpy(dst,src_m1,width);
  844. dst += dst_wrap;
  845. /* do last line */
  846. deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
  847. }
  848. static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap,
  849. int width, int height)
  850. {
  851. UINT8 *src_m1, *src_0, *src_p1, *src_p2;
  852. int y;
  853. UINT8 *buf;
  854. buf = (UINT8*)av_malloc(width);
  855. src_m1 = src1;
  856. memcpy(buf,src_m1,width);
  857. src_0=&src_m1[src_wrap];
  858. src_p1=&src_0[src_wrap];
  859. src_p2=&src_p1[src_wrap];
  860. for(y=0;y<(height-2);y+=2) {
  861. deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
  862. src_m1 = src_p1;
  863. src_0 = src_p2;
  864. src_p1 += 2*src_wrap;
  865. src_p2 += 2*src_wrap;
  866. }
  867. /* do last line */
  868. deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
  869. av_free(buf);
  870. }
  871. /* deinterlace - if not supported return -1 */
  872. int avpicture_deinterlace(AVPicture *dst, AVPicture *src,
  873. int pix_fmt, int width, int height)
  874. {
  875. int i;
  876. if (pix_fmt != PIX_FMT_YUV420P &&
  877. pix_fmt != PIX_FMT_YUV422P &&
  878. pix_fmt != PIX_FMT_YUV444P)
  879. return -1;
  880. if ((width & 3) != 0 || (height & 3) != 0)
  881. return -1;
  882. #ifdef HAVE_MMX
  883. {
  884. mmx_t rounder;
  885. rounder.uw[0]=4;
  886. rounder.uw[1]=4;
  887. rounder.uw[2]=4;
  888. rounder.uw[3]=4;
  889. pxor_r2r(mm7,mm7);
  890. movq_m2r(rounder,mm6);
  891. }
  892. #endif
  893. for(i=0;i<3;i++) {
  894. if (i == 1) {
  895. switch(pix_fmt) {
  896. case PIX_FMT_YUV420P:
  897. width >>= 1;
  898. height >>= 1;
  899. break;
  900. case PIX_FMT_YUV422P:
  901. width >>= 1;
  902. break;
  903. default:
  904. break;
  905. }
  906. }
  907. if (src == dst) {
  908. deinterlace_bottom_field_inplace(src->data[i], src->linesize[i],
  909. width, height);
  910. } else {
  911. deinterlace_bottom_field(dst->data[i],dst->linesize[i],
  912. src->data[i], src->linesize[i],
  913. width, height);
  914. }
  915. }
  916. #ifdef HAVE_MMX
  917. emms();
  918. #endif
  919. return 0;
  920. }
  921. #undef FIX