You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

874 lines
20KB

  1. /*
  2. * yuv2rgb.c, Software YUV to RGB coverter
  3. *
  4. * Copyright (C) 1999, Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
  5. * All Rights Reserved.
  6. *
  7. * Functions broken out from display_x11.c and several new modes
  8. * added by HÃ¥kan Hjort <d95hjort@dtek.chalmers.se>
  9. *
  10. * 15 & 16 bpp support by Franck Sicard <Franck.Sicard@solsoft.fr>
  11. *
  12. * This file is part of mpeg2dec, a free MPEG-2 video decoder
  13. *
  14. * mpeg2dec is free software; you can redistribute it and/or modify
  15. * it under the terms of the GNU General Public License as published by
  16. * the Free Software Foundation; either version 2, or (at your option)
  17. * any later version.
  18. *
  19. * mpeg2dec is distributed in the hope that it will be useful,
  20. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  22. * GNU General Public License for more details.
  23. *
  24. * You should have received a copy of the GNU General Public License
  25. * along with GNU Make; see the file COPYING. If not, write to
  26. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  27. *
  28. * MMX/MMX2 Template stuff from Michael Niedermayer (michaelni@gmx.at) (needed for fast movntq support)
  29. * 1,4,8bpp support by Michael Niedermayer (michaelni@gmx.at)
  30. */
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <inttypes.h>
  34. #include "config.h"
  35. //#include "video_out.h"
  36. #include "rgb2rgb.h"
  37. #include "../cpudetect.h"
  38. #include "../mangle.h"
  39. #include "../mp_msg.h"
  40. #ifdef HAVE_MLIB
  41. #include "yuv2rgb_mlib.c"
  42. #endif
  43. #define DITHER1XBPP // only for mmx
  44. #ifdef ARCH_X86
  45. #define CAN_COMPILE_X86_ASM
  46. #endif
  47. uint8_t __attribute__((aligned(8))) dither_8x8_32[8][8]={
  48. { 17, 9, 23, 15, 16, 8, 22, 14, },
  49. { 5, 29, 3, 27, 4, 28, 2, 26, },
  50. { 21, 13, 19, 11, 20, 12, 18, 10, },
  51. { 0, 24, 6, 30, 1, 25, 7, 31, },
  52. { 16, 8, 22, 14, 17, 9, 23, 15, },
  53. { 4, 28, 2, 26, 5, 29, 3, 27, },
  54. { 20, 12, 18, 10, 21, 13, 19, 11, },
  55. { 1, 25, 7, 31, 0, 24, 6, 30, },
  56. };
  57. uint8_t __attribute__((aligned(8))) dither_8x8_64[8][8]={
  58. { 0, 48, 12, 60, 3, 51, 15, 63, },
  59. { 32, 16, 44, 28, 35, 19, 47, 31, },
  60. { 8, 56, 4, 52, 11, 59, 7, 55, },
  61. { 40, 24, 36, 20, 43, 27, 39, 23, },
  62. { 2, 50, 14, 62, 1, 49, 13, 61, },
  63. { 34, 18, 46, 30, 33, 17, 45, 29, },
  64. { 10, 58, 6, 54, 9, 57, 5, 53, },
  65. { 42, 26, 38, 22, 41, 25, 37, 21, },
  66. };
  67. uint8_t __attribute__((aligned(8))) dither_8x8_128[8][8]={
  68. { 68, 36, 92, 60, 66, 34, 90, 58, },
  69. { 20, 116, 12, 108, 18, 114, 10, 106, },
  70. { 84, 52, 76, 44, 82, 50, 74, 42, },
  71. { 0, 96, 24, 120, 6, 102, 30, 126, },
  72. { 64, 32, 88, 56, 70, 38, 94, 62, },
  73. { 16, 112, 8, 104, 22, 118, 14, 110, },
  74. { 80, 48, 72, 40, 86, 54, 78, 46, },
  75. { 4, 100, 28, 124, 2, 98, 26, 122, },
  76. };
  77. #ifdef CAN_COMPILE_X86_ASM
  78. /* hope these constant values are cache line aligned */
  79. uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080;
  80. uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010;
  81. uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff;
  82. uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f;
  83. /* hope these constant values are cache line aligned */
  84. uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d;
  85. uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093;
  86. uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312;
  87. uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc;
  88. /* hope these constant values are cache line aligned */
  89. uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8;
  90. uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
  91. uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
  92. uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
  93. uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
  94. // the volatile is required because gcc otherwise optimizes some writes away not knowing that these
  95. // are read in the asm block
  96. volatile uint64_t __attribute__((aligned(8))) b5Dither;
  97. volatile uint64_t __attribute__((aligned(8))) g5Dither;
  98. volatile uint64_t __attribute__((aligned(8))) g6Dither;
  99. volatile uint64_t __attribute__((aligned(8))) r5Dither;
  100. uint64_t __attribute__((aligned(8))) dither4[2]={
  101. 0x0103010301030103LL,
  102. 0x0200020002000200LL,};
  103. uint64_t __attribute__((aligned(8))) dither8[2]={
  104. 0x0602060206020602LL,
  105. 0x0004000400040004LL,};
  106. #undef HAVE_MMX
  107. #undef ARCH_X86
  108. //MMX versions
  109. #undef RENAME
  110. #define HAVE_MMX
  111. #undef HAVE_MMX2
  112. #undef HAVE_3DNOW
  113. #define ARCH_X86
  114. #define RENAME(a) a ## _MMX
  115. #include "yuv2rgb_template.c"
  116. //MMX2 versions
  117. #undef RENAME
  118. #define HAVE_MMX
  119. #define HAVE_MMX2
  120. #undef HAVE_3DNOW
  121. #define ARCH_X86
  122. #define RENAME(a) a ## _MMX2
  123. #include "yuv2rgb_template.c"
  124. #endif // CAN_COMPILE_X86_ASM
  125. uint32_t matrix_coefficients = 6;
  126. const int32_t Inverse_Table_6_9[8][4] = {
  127. {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
  128. {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
  129. {104597, 132201, 25675, 53279}, /* unspecified */
  130. {104597, 132201, 25675, 53279}, /* reserved */
  131. {104448, 132798, 24759, 53109}, /* FCC */
  132. {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
  133. {104597, 132201, 25675, 53279}, /* SMPTE 170M */
  134. {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
  135. };
  136. static void yuv2rgb_c_init (int bpp, int mode);
  137. yuv2rgb_fun yuv2rgb;
  138. static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *,
  139. uint8_t *, uint8_t *,
  140. void *, void *, int, int);
  141. static void yuv2rgb_c (void * dst, uint8_t * py,
  142. uint8_t * pu, uint8_t * pv,
  143. int h_size, int v_size,
  144. int rgb_stride, int y_stride, int uv_stride)
  145. {
  146. v_size >>= 1;
  147. while (v_size--) {
  148. yuv2rgb_c_internal (py, py + y_stride, pu, pv, dst, dst + rgb_stride,
  149. h_size, v_size<<1);
  150. py += 2 * y_stride;
  151. pu += uv_stride;
  152. pv += uv_stride;
  153. dst += 2 * rgb_stride;
  154. }
  155. }
  156. void yuv2rgb_init (int bpp, int mode)
  157. {
  158. yuv2rgb = NULL;
  159. #ifdef CAN_COMPILE_X86_ASM
  160. if(gCpuCaps.hasMMX2)
  161. {
  162. if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
  163. yuv2rgb = yuv2rgb_init_MMX2 (bpp, mode);
  164. if (yuv2rgb != NULL)
  165. mp_msg(MSGT_SWS,MSGL_INFO,"Using MMX2 for colorspace transform\n");
  166. else
  167. mp_msg(MSGT_SWS,MSGL_WARN,"Cannot init MMX2 colorspace transform\n");
  168. }
  169. }
  170. else if(gCpuCaps.hasMMX)
  171. {
  172. if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
  173. yuv2rgb = yuv2rgb_init_MMX (bpp, mode);
  174. if (yuv2rgb != NULL)
  175. mp_msg(MSGT_SWS,MSGL_INFO,"Using MMX for colorspace transform\n");
  176. else
  177. mp_msg(MSGT_SWS,MSGL_WARN,"Cannot init MMX colorspace transform\n");
  178. }
  179. }
  180. #endif
  181. #ifdef HAVE_MLIB
  182. if (yuv2rgb == NULL /*&& (config.flags & VO_MLIB_ENABLE)*/) {
  183. yuv2rgb = yuv2rgb_init_mlib (bpp, mode);
  184. if (yuv2rgb != NULL)
  185. mp_msg(MSGT_SWS,MSGL_INFO,"Using mlib for colorspace transform\n");
  186. }
  187. #endif
  188. if (yuv2rgb == NULL) {
  189. mp_msg(MSGT_SWS,MSGL_INFO,"No accelerated colorspace conversion found\n");
  190. yuv2rgb_c_init (bpp, mode);
  191. yuv2rgb = (yuv2rgb_fun)yuv2rgb_c;
  192. }
  193. }
  194. void * table_rV[256];
  195. void * table_gU[256];
  196. int table_gV[256];
  197. void * table_bU[256];
  198. #define RGB(i) \
  199. U = pu[i]; \
  200. V = pv[i]; \
  201. r = table_rV[V]; \
  202. g = table_gU[U] + table_gV[V]; \
  203. b = table_bU[U];
  204. #define DST1(i) \
  205. Y = py_1[2*i]; \
  206. dst_1[2*i] = r[Y] + g[Y] + b[Y]; \
  207. Y = py_1[2*i+1]; \
  208. dst_1[2*i+1] = r[Y] + g[Y] + b[Y];
  209. #define DST2(i) \
  210. Y = py_2[2*i]; \
  211. dst_2[2*i] = r[Y] + g[Y] + b[Y]; \
  212. Y = py_2[2*i+1]; \
  213. dst_2[2*i+1] = r[Y] + g[Y] + b[Y];
  214. #define DST1RGB(i) \
  215. Y = py_1[2*i]; \
  216. dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y]; \
  217. Y = py_1[2*i+1]; \
  218. dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y];
  219. #define DST2RGB(i) \
  220. Y = py_2[2*i]; \
  221. dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y]; \
  222. Y = py_2[2*i+1]; \
  223. dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y];
  224. #define DST1BGR(i) \
  225. Y = py_1[2*i]; \
  226. dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y]; \
  227. Y = py_1[2*i+1]; \
  228. dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y];
  229. #define DST2BGR(i) \
  230. Y = py_2[2*i]; \
  231. dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y]; \
  232. Y = py_2[2*i+1]; \
  233. dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y];
  234. static void yuv2rgb_c_32 (uint8_t * py_1, uint8_t * py_2,
  235. uint8_t * pu, uint8_t * pv,
  236. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  237. {
  238. int U, V, Y;
  239. uint32_t * r, * g, * b;
  240. uint32_t * dst_1, * dst_2;
  241. h_size >>= 3;
  242. dst_1 = _dst_1;
  243. dst_2 = _dst_2;
  244. while (h_size--) {
  245. RGB(0);
  246. DST1(0);
  247. DST2(0);
  248. RGB(1);
  249. DST2(1);
  250. DST1(1);
  251. RGB(2);
  252. DST1(2);
  253. DST2(2);
  254. RGB(3);
  255. DST2(3);
  256. DST1(3);
  257. pu += 4;
  258. pv += 4;
  259. py_1 += 8;
  260. py_2 += 8;
  261. dst_1 += 8;
  262. dst_2 += 8;
  263. }
  264. }
  265. // This is very near from the yuv2rgb_c_32 code
  266. static void yuv2rgb_c_24_rgb (uint8_t * py_1, uint8_t * py_2,
  267. uint8_t * pu, uint8_t * pv,
  268. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  269. {
  270. int U, V, Y;
  271. uint8_t * r, * g, * b;
  272. uint8_t * dst_1, * dst_2;
  273. h_size >>= 3;
  274. dst_1 = _dst_1;
  275. dst_2 = _dst_2;
  276. while (h_size--) {
  277. RGB(0);
  278. DST1RGB(0);
  279. DST2RGB(0);
  280. RGB(1);
  281. DST2RGB(1);
  282. DST1RGB(1);
  283. RGB(2);
  284. DST1RGB(2);
  285. DST2RGB(2);
  286. RGB(3);
  287. DST2RGB(3);
  288. DST1RGB(3);
  289. pu += 4;
  290. pv += 4;
  291. py_1 += 8;
  292. py_2 += 8;
  293. dst_1 += 24;
  294. dst_2 += 24;
  295. }
  296. }
  297. // only trivial mods from yuv2rgb_c_24_rgb
  298. static void yuv2rgb_c_24_bgr (uint8_t * py_1, uint8_t * py_2,
  299. uint8_t * pu, uint8_t * pv,
  300. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  301. {
  302. int U, V, Y;
  303. uint8_t * r, * g, * b;
  304. uint8_t * dst_1, * dst_2;
  305. h_size >>= 3;
  306. dst_1 = _dst_1;
  307. dst_2 = _dst_2;
  308. while (h_size--) {
  309. RGB(0);
  310. DST1BGR(0);
  311. DST2BGR(0);
  312. RGB(1);
  313. DST2BGR(1);
  314. DST1BGR(1);
  315. RGB(2);
  316. DST1BGR(2);
  317. DST2BGR(2);
  318. RGB(3);
  319. DST2BGR(3);
  320. DST1BGR(3);
  321. pu += 4;
  322. pv += 4;
  323. py_1 += 8;
  324. py_2 += 8;
  325. dst_1 += 24;
  326. dst_2 += 24;
  327. }
  328. }
  329. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  330. // r, g, b, dst_1, dst_2
  331. static void yuv2rgb_c_16 (uint8_t * py_1, uint8_t * py_2,
  332. uint8_t * pu, uint8_t * pv,
  333. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  334. {
  335. int U, V, Y;
  336. uint16_t * r, * g, * b;
  337. uint16_t * dst_1, * dst_2;
  338. h_size >>= 3;
  339. dst_1 = _dst_1;
  340. dst_2 = _dst_2;
  341. while (h_size--) {
  342. RGB(0);
  343. DST1(0);
  344. DST2(0);
  345. RGB(1);
  346. DST2(1);
  347. DST1(1);
  348. RGB(2);
  349. DST1(2);
  350. DST2(2);
  351. RGB(3);
  352. DST2(3);
  353. DST1(3);
  354. pu += 4;
  355. pv += 4;
  356. py_1 += 8;
  357. py_2 += 8;
  358. dst_1 += 8;
  359. dst_2 += 8;
  360. }
  361. }
  362. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  363. // r, g, b, dst_1, dst_2
  364. static void yuv2rgb_c_8 (uint8_t * py_1, uint8_t * py_2,
  365. uint8_t * pu, uint8_t * pv,
  366. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  367. {
  368. int U, V, Y;
  369. uint8_t * r, * g, * b;
  370. uint8_t * dst_1, * dst_2;
  371. h_size >>= 3;
  372. dst_1 = _dst_1;
  373. dst_2 = _dst_2;
  374. while (h_size--) {
  375. RGB(0);
  376. DST1(0);
  377. DST2(0);
  378. RGB(1);
  379. DST2(1);
  380. DST1(1);
  381. RGB(2);
  382. DST1(2);
  383. DST2(2);
  384. RGB(3);
  385. DST2(3);
  386. DST1(3);
  387. pu += 4;
  388. pv += 4;
  389. py_1 += 8;
  390. py_2 += 8;
  391. dst_1 += 8;
  392. dst_2 += 8;
  393. }
  394. }
  395. // r, g, b, dst_1, dst_2
  396. static void yuv2rgb_c_8_ordered_dither (uint8_t * py_1, uint8_t * py_2,
  397. uint8_t * pu, uint8_t * pv,
  398. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  399. {
  400. int U, V, Y;
  401. uint8_t * r, * g, * b;
  402. uint8_t * dst_1, * dst_2;
  403. h_size >>= 3;
  404. dst_1 = _dst_1;
  405. dst_2 = _dst_2;
  406. while (h_size--) {
  407. uint8_t *d32= dither_8x8_32[v_pos&7];
  408. uint8_t *d64= dither_8x8_64[v_pos&7];
  409. #define DST1bpp8(i,o) \
  410. Y = py_1[2*i]; \
  411. dst_1[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
  412. Y = py_1[2*i+1]; \
  413. dst_1[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
  414. #define DST2bpp8(i,o) \
  415. Y = py_2[2*i]; \
  416. dst_2[2*i] = r[Y+d32[8+o]] + g[Y+d32[8+o]] + b[Y+d64[8+o]]; \
  417. Y = py_2[2*i+1]; \
  418. dst_2[2*i+1] = r[Y+d32[9+o]] + g[Y+d32[9+o]] + b[Y+d64[9+o]];
  419. RGB(0);
  420. DST1bpp8(0,0);
  421. DST2bpp8(0,0);
  422. RGB(1);
  423. DST2bpp8(1,2);
  424. DST1bpp8(1,2);
  425. RGB(2);
  426. DST1bpp8(2,4);
  427. DST2bpp8(2,4);
  428. RGB(3);
  429. DST2bpp8(3,6);
  430. DST1bpp8(3,6);
  431. pu += 4;
  432. pv += 4;
  433. py_1 += 8;
  434. py_2 += 8;
  435. dst_1 += 8;
  436. dst_2 += 8;
  437. }
  438. }
  439. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  440. // r, g, b, dst_1, dst_2
  441. static void yuv2rgb_c_4 (uint8_t * py_1, uint8_t * py_2,
  442. uint8_t * pu, uint8_t * pv,
  443. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  444. {
  445. int U, V, Y, out;
  446. uint8_t * r, * g, * b;
  447. uint8_t * dst_1, * dst_2;
  448. h_size >>= 3;
  449. dst_1 = _dst_1;
  450. dst_2 = _dst_2;
  451. while (h_size--) {
  452. RGB(0);
  453. DST1(0);
  454. DST2(0);
  455. RGB(1);
  456. DST2(1);
  457. DST1(1);
  458. RGB(2);
  459. DST1(2);
  460. DST2(2);
  461. RGB(3);
  462. DST2(3);
  463. DST1(3);
  464. pu += 4;
  465. pv += 4;
  466. py_1 += 8;
  467. py_2 += 8;
  468. dst_1 += 8;
  469. dst_2 += 8;
  470. }
  471. }
  472. static void yuv2rgb_c_4_ordered_dither (uint8_t * py_1, uint8_t * py_2,
  473. uint8_t * pu, uint8_t * pv,
  474. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  475. {
  476. int U, V, Y;
  477. uint8_t * r, * g, * b;
  478. uint8_t * dst_1, * dst_2;
  479. h_size >>= 3;
  480. dst_1 = _dst_1;
  481. dst_2 = _dst_2;
  482. while (h_size--) {
  483. uint8_t *d64= dither_8x8_64 [v_pos&7];
  484. uint8_t *d128=dither_8x8_128[v_pos&7];
  485. #define DST1bpp4(i,o) \
  486. Y = py_1[2*i]; \
  487. dst_1[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
  488. Y = py_1[2*i+1]; \
  489. dst_1[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
  490. #define DST2bpp4(i,o) \
  491. Y = py_2[2*i]; \
  492. dst_2[2*i] = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \
  493. Y = py_2[2*i+1]; \
  494. dst_2[2*i+1] = r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]];
  495. RGB(0);
  496. DST1bpp4(0,0);
  497. DST2bpp4(0,0);
  498. RGB(1);
  499. DST2bpp4(1,2);
  500. DST1bpp4(1,2);
  501. RGB(2);
  502. DST1bpp4(2,4);
  503. DST2bpp4(2,4);
  504. RGB(3);
  505. DST2bpp4(3,6);
  506. DST1bpp4(3,6);
  507. pu += 4;
  508. pv += 4;
  509. py_1 += 8;
  510. py_2 += 8;
  511. dst_1 += 8;
  512. dst_2 += 8;
  513. }
  514. }
  515. static void yuv2rgb_c_1_ordered_dither (uint8_t * py_1, uint8_t * py_2,
  516. uint8_t * pu, uint8_t * pv,
  517. void * _dst_1, void * _dst_2, int h_size, int v_pos)
  518. {
  519. int U, V, Y;
  520. uint8_t * r, * g, * b;
  521. uint8_t * dst_1, * dst_2;
  522. h_size >>= 3;
  523. dst_1 = _dst_1;
  524. dst_2 = _dst_2;
  525. g= table_gU[128] + table_gV[128];
  526. while (h_size--) {
  527. uint8_t *d128=dither_8x8_128[v_pos&7];
  528. char out_1=0, out_2=0;
  529. #define DST1bpp1(i,o) \
  530. Y = py_1[2*i]; \
  531. out_1+= out_1 + g[Y+d128[0+o]]; \
  532. Y = py_1[2*i+1]; \
  533. out_1+= out_1 + g[Y+d128[1+o]];
  534. #define DST2bpp1(i,o) \
  535. Y = py_2[2*i]; \
  536. out_2+= out_2 + g[Y+d128[8+o]]; \
  537. Y = py_2[2*i+1]; \
  538. out_2+= out_2 + g[Y+d128[9+o]];
  539. DST1bpp1(0,0);
  540. DST2bpp1(0,0);
  541. DST2bpp1(1,2);
  542. DST1bpp1(1,2);
  543. DST1bpp1(2,4);
  544. DST2bpp1(2,4);
  545. DST2bpp1(3,6);
  546. DST1bpp1(3,6);
  547. dst_1[0]= out_1;
  548. dst_2[0]= out_2;
  549. pu += 4;
  550. pv += 4;
  551. py_1 += 8;
  552. py_2 += 8;
  553. dst_1 ++;
  554. dst_2 ++;
  555. }
  556. }
  557. static int div_round (int dividend, int divisor)
  558. {
  559. if (dividend > 0)
  560. return (dividend + (divisor>>1)) / divisor;
  561. else
  562. return -((-dividend + (divisor>>1)) / divisor);
  563. }
  564. static void yuv2rgb_c_init (int bpp, int mode)
  565. {
  566. int i;
  567. uint8_t table_Y[1024];
  568. uint32_t *table_32 = 0;
  569. uint16_t *table_16 = 0;
  570. uint8_t *table_8 = 0;
  571. uint8_t *table_332 = 0;
  572. uint8_t *table_121 = 0;
  573. uint8_t *table_1 = 0;
  574. int entry_size = 0;
  575. void *table_r = 0, *table_g = 0, *table_b = 0;
  576. int crv = Inverse_Table_6_9[matrix_coefficients][0];
  577. int cbu = Inverse_Table_6_9[matrix_coefficients][1];
  578. int cgu = -Inverse_Table_6_9[matrix_coefficients][2];
  579. int cgv = -Inverse_Table_6_9[matrix_coefficients][3];
  580. for (i = 0; i < 1024; i++) {
  581. int j;
  582. j = (76309 * (i - 384 - 16) + 32768) >> 16;
  583. j = (j < 0) ? 0 : ((j > 255) ? 255 : j);
  584. table_Y[i] = j;
  585. }
  586. switch (bpp) {
  587. case 32:
  588. yuv2rgb_c_internal = yuv2rgb_c_32;
  589. table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t));
  590. entry_size = sizeof (uint32_t);
  591. table_r = table_32 + 197;
  592. table_b = table_32 + 197 + 685;
  593. table_g = table_32 + 197 + 2*682;
  594. for (i = -197; i < 256+197; i++)
  595. ((uint32_t *)table_r)[i] = table_Y[i+384] << ((mode==MODE_RGB) ? 16 : 0);
  596. for (i = -132; i < 256+132; i++)
  597. ((uint32_t *)table_g)[i] = table_Y[i+384] << 8;
  598. for (i = -232; i < 256+232; i++)
  599. ((uint32_t *)table_b)[i] = table_Y[i+384] << ((mode==MODE_RGB) ? 0 : 16);
  600. break;
  601. case 24:
  602. // yuv2rgb_c_internal = (mode==MODE_RGB) ? yuv2rgb_c_24_rgb : yuv2rgb_c_24_bgr;
  603. yuv2rgb_c_internal = (mode!=MODE_RGB) ? yuv2rgb_c_24_rgb : yuv2rgb_c_24_bgr;
  604. table_8 = malloc ((256 + 2*232) * sizeof (uint8_t));
  605. entry_size = sizeof (uint8_t);
  606. table_r = table_g = table_b = table_8 + 232;
  607. for (i = -232; i < 256+232; i++)
  608. ((uint8_t * )table_b)[i] = table_Y[i+384];
  609. break;
  610. case 15:
  611. case 16:
  612. yuv2rgb_c_internal = yuv2rgb_c_16;
  613. table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t));
  614. entry_size = sizeof (uint16_t);
  615. table_r = table_16 + 197;
  616. table_b = table_16 + 197 + 685;
  617. table_g = table_16 + 197 + 2*682;
  618. for (i = -197; i < 256+197; i++) {
  619. int j = table_Y[i+384] >> 3;
  620. if (mode == MODE_RGB)
  621. j <<= ((bpp==16) ? 11 : 10);
  622. ((uint16_t *)table_r)[i] = j;
  623. }
  624. for (i = -132; i < 256+132; i++) {
  625. int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
  626. ((uint16_t *)table_g)[i] = j << 5;
  627. }
  628. for (i = -232; i < 256+232; i++) {
  629. int j = table_Y[i+384] >> 3;
  630. if (mode == MODE_BGR)
  631. j <<= ((bpp==16) ? 11 : 10);
  632. ((uint16_t *)table_b)[i] = j;
  633. }
  634. break;
  635. case 8:
  636. yuv2rgb_c_internal = yuv2rgb_c_8_ordered_dither; //yuv2rgb_c_8;
  637. table_332 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
  638. entry_size = sizeof (uint8_t);
  639. table_r = table_332 + 197;
  640. table_b = table_332 + 197 + 685;
  641. table_g = table_332 + 197 + 2*682;
  642. for (i = -197; i < 256+197; i++) {
  643. int j = (table_Y[i+384] - 16) >> 5;
  644. if(j<0) j=0;
  645. if (mode == MODE_RGB)
  646. j <<= 5;
  647. ((uint8_t *)table_r)[i] = j;
  648. }
  649. for (i = -132; i < 256+132; i++) {
  650. int j = (table_Y[i+384] - 16) >> 5;
  651. if(j<0) j=0;
  652. if (mode == MODE_BGR)
  653. j <<= 1;
  654. ((uint8_t *)table_g)[i] = j << 2;
  655. }
  656. for (i = -232; i < 256+232; i++) {
  657. int j = (table_Y[i+384] - 32) >> 6;
  658. if(j<0) j=0;
  659. if (mode == MODE_BGR)
  660. j <<= 6;
  661. ((uint8_t *)table_b)[i] = j;
  662. }
  663. break;
  664. case 4:
  665. yuv2rgb_c_internal = yuv2rgb_c_4_ordered_dither; //yuv2rgb_c_4;
  666. table_121 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
  667. entry_size = sizeof (uint8_t);
  668. table_r = table_121 + 197;
  669. table_b = table_121 + 197 + 685;
  670. table_g = table_121 + 197 + 2*682;
  671. for (i = -197; i < 256+197; i++) {
  672. int j = (table_Y[i+384] - 64) >> 7;
  673. if(j<0) j=0;
  674. if (mode == MODE_RGB)
  675. j <<= 3;
  676. ((uint8_t *)table_r)[i] = j;
  677. }
  678. for (i = -132; i < 256+132; i++) {
  679. int j = (table_Y[i+384] - 32) >> 6;
  680. if(j<0) j=0;
  681. ((uint8_t *)table_g)[i] = j << 1;
  682. }
  683. for (i = -232; i < 256+232; i++) {
  684. int j =(table_Y[i+384] - 64) >> 7;
  685. if(j<0) j=0;
  686. if (mode == MODE_BGR)
  687. j <<= 3;
  688. ((uint8_t *)table_b)[i] = j;
  689. }
  690. break;
  691. case 1:
  692. yuv2rgb_c_internal = yuv2rgb_c_1_ordered_dither;
  693. table_1 = malloc ((132*2 + 256) * sizeof (uint8_t));
  694. entry_size = sizeof (uint8_t);
  695. table_g = table_1 + 132;
  696. table_r = table_b = NULL;
  697. for (i = -132; i < 256+132; i++) {
  698. int j = (table_Y[i+384] - 64) >> 7;
  699. if(j<0) j=0;
  700. ((uint8_t *)table_g)[i] = j;
  701. }
  702. break;
  703. default:
  704. mp_msg(MSGT_SWS,MSGL_ERR,"%ibpp not supported by yuv2rgb\n", bpp);
  705. //exit (1);
  706. }
  707. for (i = 0; i < 256; i++) {
  708. table_rV[i] = table_r + entry_size * div_round (crv * (i-128), 76309);
  709. table_gU[i] = table_g + entry_size * div_round (cgu * (i-128), 76309);
  710. table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
  711. table_bU[i] = table_b + entry_size * div_round (cbu * (i-128), 76309);
  712. }
  713. }