You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

851 lines
25KB

  1. /*
  2. *
  3. * rgb2rgb.c, Software RGB to RGB convertor
  4. * pluralize by Software PAL8 to RGB convertor
  5. * Software YUV to YUV convertor
  6. * Software YUV to RGB convertor
  7. * Written by Nick Kurshev.
  8. * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
  9. */
  10. #include <inttypes.h>
  11. #include "../config.h"
  12. #include "rgb2rgb.h"
  13. #include "../cpudetect.h"
  14. #include "../mangle.h"
  15. #ifdef ARCH_X86
  16. #define CAN_COMPILE_X86_ASM
  17. #endif
  18. #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
  19. #ifdef CAN_COMPILE_X86_ASM
  20. static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL;
  21. static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
  22. static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL;
  23. static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
  24. static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
  25. static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
  26. static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
  27. static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
  28. static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
  29. static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
  30. static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
  31. static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL;
  32. static const uint64_t mask24hhh __attribute__((aligned(8))) = 0xffffffff00000000ULL;
  33. static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
  34. static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
  35. static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
  36. static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
  37. static const uint64_t mask15g __attribute__((aligned(8))) = 0x03E003E003E003E0ULL;
  38. static const uint64_t mask15r __attribute__((aligned(8))) = 0x7C007C007C007C00ULL;
  39. #define mask16b mask15b
  40. static const uint64_t mask16g __attribute__((aligned(8))) = 0x07E007E007E007E0ULL;
  41. static const uint64_t mask16r __attribute__((aligned(8))) = 0xF800F800F800F800ULL;
  42. static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
  43. static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
  44. static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
  45. static const uint64_t red_15mask __attribute__((aligned(8))) = 0x00007c000000f800ULL;
  46. static const uint64_t green_15mask __attribute__((aligned(8)))= 0x000003e0000007e0ULL;
  47. static const uint64_t blue_15mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
  48. #ifdef FAST_BGR2YV12
  49. static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000000210041000DULL;
  50. static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
  51. static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
  52. #else
  53. static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000020E540830C8BULL;
  54. static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
  55. static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
  56. #endif
  57. static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
  58. static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
  59. static const uint64_t w1111 __attribute__((aligned(8))) = 0x0001000100010001ULL;
  60. #if 0
  61. static volatile uint64_t __attribute__((aligned(8))) b5Dither;
  62. static volatile uint64_t __attribute__((aligned(8))) g5Dither;
  63. static volatile uint64_t __attribute__((aligned(8))) g6Dither;
  64. static volatile uint64_t __attribute__((aligned(8))) r5Dither;
  65. static uint64_t __attribute__((aligned(8))) dither4[2]={
  66. 0x0103010301030103LL,
  67. 0x0200020002000200LL,};
  68. static uint64_t __attribute__((aligned(8))) dither8[2]={
  69. 0x0602060206020602LL,
  70. 0x0004000400040004LL,};
  71. #endif
  72. #endif
  73. #define RGB2YUV_SHIFT 8
  74. #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
  75. #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
  76. #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
  77. #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
  78. #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
  79. #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
  80. #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
  81. #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
  82. #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
  83. //Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
  84. //Plain C versions
  85. #undef HAVE_MMX
  86. #undef HAVE_MMX2
  87. #undef HAVE_3DNOW
  88. #undef ARCH_X86
  89. #undef HAVE_SSE2
  90. #define RENAME(a) a ## _C
  91. #include "rgb2rgb_template.c"
  92. #ifdef CAN_COMPILE_X86_ASM
  93. //MMX versions
  94. #undef RENAME
  95. #define HAVE_MMX
  96. #undef HAVE_MMX2
  97. #undef HAVE_3DNOW
  98. #undef HAVE_SSE2
  99. #define ARCH_X86
  100. #define RENAME(a) a ## _MMX
  101. #include "rgb2rgb_template.c"
  102. //MMX2 versions
  103. #undef RENAME
  104. #define HAVE_MMX
  105. #define HAVE_MMX2
  106. #undef HAVE_3DNOW
  107. #undef HAVE_SSE2
  108. #define ARCH_X86
  109. #define RENAME(a) a ## _MMX2
  110. #include "rgb2rgb_template.c"
  111. //3DNOW versions
  112. #undef RENAME
  113. #define HAVE_MMX
  114. #undef HAVE_MMX2
  115. #define HAVE_3DNOW
  116. #undef HAVE_SSE2
  117. #define ARCH_X86
  118. #define RENAME(a) a ## _3DNow
  119. #include "rgb2rgb_template.c"
  120. #endif //CAN_COMPILE_X86_ASM
  121. void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
  122. {
  123. #ifdef CAN_COMPILE_X86_ASM
  124. // ordered per speed fasterst first
  125. if(gCpuCaps.hasMMX2)
  126. rgb24to32_MMX2(src, dst, src_size);
  127. else if(gCpuCaps.has3DNow)
  128. rgb24to32_3DNow(src, dst, src_size);
  129. else if(gCpuCaps.hasMMX)
  130. rgb24to32_MMX(src, dst, src_size);
  131. else
  132. #endif
  133. rgb24to32_C(src, dst, src_size);
  134. }
  135. void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
  136. {
  137. #ifdef CAN_COMPILE_X86_ASM
  138. // ordered per speed fasterst first
  139. if(gCpuCaps.hasMMX2)
  140. rgb15to24_MMX2(src, dst, src_size);
  141. else if(gCpuCaps.has3DNow)
  142. rgb15to24_3DNow(src, dst, src_size);
  143. else if(gCpuCaps.hasMMX)
  144. rgb15to24_MMX(src, dst, src_size);
  145. else
  146. #endif
  147. rgb15to24_C(src, dst, src_size);
  148. }
  149. void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
  150. {
  151. #ifdef CAN_COMPILE_X86_ASM
  152. // ordered per speed fasterst first
  153. if(gCpuCaps.hasMMX2)
  154. rgb16to24_MMX2(src, dst, src_size);
  155. else if(gCpuCaps.has3DNow)
  156. rgb16to24_3DNow(src, dst, src_size);
  157. else if(gCpuCaps.hasMMX)
  158. rgb16to24_MMX(src, dst, src_size);
  159. else
  160. #endif
  161. rgb16to24_C(src, dst, src_size);
  162. }
  163. void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
  164. {
  165. #ifdef CAN_COMPILE_X86_ASM
  166. // ordered per speed fasterst first
  167. if(gCpuCaps.hasMMX2)
  168. rgb15to32_MMX2(src, dst, src_size);
  169. else if(gCpuCaps.has3DNow)
  170. rgb15to32_3DNow(src, dst, src_size);
  171. else if(gCpuCaps.hasMMX)
  172. rgb15to32_MMX(src, dst, src_size);
  173. else
  174. #endif
  175. rgb15to32_C(src, dst, src_size);
  176. }
  177. void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
  178. {
  179. #ifdef CAN_COMPILE_X86_ASM
  180. // ordered per speed fasterst first
  181. if(gCpuCaps.hasMMX2)
  182. rgb16to32_MMX2(src, dst, src_size);
  183. else if(gCpuCaps.has3DNow)
  184. rgb16to32_3DNow(src, dst, src_size);
  185. else if(gCpuCaps.hasMMX)
  186. rgb16to32_MMX(src, dst, src_size);
  187. else
  188. #endif
  189. rgb16to32_C(src, dst, src_size);
  190. }
  191. void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
  192. {
  193. #ifdef CAN_COMPILE_X86_ASM
  194. // ordered per speed fasterst first
  195. if(gCpuCaps.hasMMX2)
  196. rgb32to24_MMX2(src, dst, src_size);
  197. else if(gCpuCaps.has3DNow)
  198. rgb32to24_3DNow(src, dst, src_size);
  199. else if(gCpuCaps.hasMMX)
  200. rgb32to24_MMX(src, dst, src_size);
  201. else
  202. #endif
  203. rgb32to24_C(src, dst, src_size);
  204. }
  205. /*
  206. Original by Strepto/Astral
  207. ported to gcc & bugfixed : A'rpi
  208. MMX2, 3DNOW optimization by Nick Kurshev
  209. 32bit c version, and and&add trick by Michael Niedermayer
  210. */
  211. void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
  212. {
  213. #ifdef CAN_COMPILE_X86_ASM
  214. // ordered per speed fasterst first
  215. if(gCpuCaps.hasMMX2)
  216. rgb15to16_MMX2(src, dst, src_size);
  217. else if(gCpuCaps.has3DNow)
  218. rgb15to16_3DNow(src, dst, src_size);
  219. else if(gCpuCaps.hasMMX)
  220. rgb15to16_MMX(src, dst, src_size);
  221. else
  222. #endif
  223. rgb15to16_C(src, dst, src_size);
  224. }
  225. void rgb16to15(const uint8_t *src,uint8_t *dst,unsigned src_size)
  226. {
  227. #ifdef CAN_COMPILE_X86_ASM
  228. // ordered per speed fasterst first
  229. if(gCpuCaps.hasMMX2)
  230. rgb16to15_MMX2(src, dst, src_size);
  231. else if(gCpuCaps.has3DNow)
  232. rgb16to15_3DNow(src, dst, src_size);
  233. else if(gCpuCaps.hasMMX)
  234. rgb16to15_MMX(src, dst, src_size);
  235. else
  236. #endif
  237. rgb16to15_C(src, dst, src_size);
  238. }
  239. /**
  240. * Pallete is assumed to contain bgr32
  241. */
  242. void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
  243. {
  244. unsigned i;
  245. for(i=0; i<num_pixels; i++)
  246. ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
  247. }
  248. /**
  249. * Pallete is assumed to contain bgr32
  250. */
  251. void palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
  252. {
  253. unsigned i;
  254. /*
  255. writes 1 byte o much and might cause alignment issues on some architectures?
  256. for(i=0; i<num_pixels; i++)
  257. ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
  258. */
  259. for(i=0; i<num_pixels; i++)
  260. {
  261. //FIXME slow?
  262. dst[0]= palette[ src[i]*4+0 ];
  263. dst[1]= palette[ src[i]*4+1 ];
  264. dst[2]= palette[ src[i]*4+2 ];
  265. dst+= 3;
  266. }
  267. }
  268. void bgr24torgb24(const uint8_t *src, uint8_t *dst, unsigned src_size)
  269. {
  270. #ifdef CAN_COMPILE_X86_ASM
  271. // ordered per speed fasterst first
  272. if(gCpuCaps.hasMMX2)
  273. bgr24torgb24_MMX2(src, dst, src_size);
  274. else if(gCpuCaps.has3DNow)
  275. bgr24torgb24_3DNow(src, dst, src_size);
  276. else if(gCpuCaps.hasMMX)
  277. bgr24torgb24_MMX(src, dst, src_size);
  278. else
  279. bgr24torgb24_C(src, dst, src_size);
  280. #else
  281. bgr24torgb24_C(src, dst, src_size);
  282. #endif
  283. }
  284. void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
  285. {
  286. #ifdef CAN_COMPILE_X86_ASM
  287. // ordered per speed fasterst first
  288. if(gCpuCaps.hasMMX2)
  289. rgb32to16_MMX2(src, dst, src_size);
  290. else if(gCpuCaps.has3DNow)
  291. rgb32to16_3DNow(src, dst, src_size);
  292. else if(gCpuCaps.hasMMX)
  293. rgb32to16_MMX(src, dst, src_size);
  294. else
  295. #endif
  296. rgb32to16_C(src, dst, src_size);
  297. }
  298. void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
  299. {
  300. #ifdef CAN_COMPILE_X86_ASM
  301. // ordered per speed fasterst first
  302. if(gCpuCaps.hasMMX2)
  303. rgb32to15_MMX2(src, dst, src_size);
  304. else if(gCpuCaps.has3DNow)
  305. rgb32to15_3DNow(src, dst, src_size);
  306. else if(gCpuCaps.hasMMX)
  307. rgb32to15_MMX(src, dst, src_size);
  308. else
  309. #endif
  310. rgb32to15_C(src, dst, src_size);
  311. }
  312. void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
  313. {
  314. #ifdef CAN_COMPILE_X86_ASM
  315. // ordered per speed fasterst first
  316. if(gCpuCaps.hasMMX2)
  317. rgb24to16_MMX2(src, dst, src_size);
  318. else if(gCpuCaps.has3DNow)
  319. rgb24to16_3DNow(src, dst, src_size);
  320. else if(gCpuCaps.hasMMX)
  321. rgb24to16_MMX(src, dst, src_size);
  322. else
  323. #endif
  324. rgb24to16_C(src, dst, src_size);
  325. }
  326. void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
  327. {
  328. #ifdef CAN_COMPILE_X86_ASM
  329. // ordered per speed fasterst first
  330. if(gCpuCaps.hasMMX2)
  331. rgb24to15_MMX2(src, dst, src_size);
  332. else if(gCpuCaps.has3DNow)
  333. rgb24to15_3DNow(src, dst, src_size);
  334. else if(gCpuCaps.hasMMX)
  335. rgb24to15_MMX(src, dst, src_size);
  336. else
  337. #endif
  338. rgb24to15_C(src, dst, src_size);
  339. }
  340. /**
  341. * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
  342. */
  343. void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
  344. {
  345. unsigned i;
  346. for(i=0; i<num_pixels; i++)
  347. ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
  348. }
  349. /**
  350. * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
  351. */
  352. void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
  353. {
  354. unsigned i;
  355. for(i=0; i<num_pixels; i++)
  356. ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
  357. }
  358. void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  359. {
  360. #ifdef CAN_COMPILE_X86_ASM
  361. // ordered per speed fasterst first
  362. if(gCpuCaps.hasMMX2)
  363. rgb32tobgr32_MMX2(src, dst, src_size);
  364. else if(gCpuCaps.has3DNow)
  365. rgb32tobgr32_3DNow(src, dst, src_size);
  366. else if(gCpuCaps.hasMMX)
  367. rgb32tobgr32_MMX(src, dst, src_size);
  368. else
  369. #endif
  370. rgb32tobgr32_C(src, dst, src_size);
  371. }
  372. void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  373. {
  374. unsigned i;
  375. unsigned num_pixels = src_size >> 2;
  376. for(i=0; i<num_pixels; i++)
  377. {
  378. dst[3*i + 0] = src[4*i + 2];
  379. dst[3*i + 1] = src[4*i + 1];
  380. dst[3*i + 2] = src[4*i + 0];
  381. }
  382. }
  383. void rgb32tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  384. {
  385. #ifdef CAN_COMPILE_X86_ASM
  386. // ordered per speed fasterst first
  387. if(gCpuCaps.hasMMX2)
  388. rgb32tobgr16_MMX2(src, dst, src_size);
  389. else if(gCpuCaps.has3DNow)
  390. rgb32tobgr16_3DNow(src, dst, src_size);
  391. else if(gCpuCaps.hasMMX)
  392. rgb32tobgr16_MMX(src, dst, src_size);
  393. else
  394. #endif
  395. rgb32tobgr16_C(src, dst, src_size);
  396. }
  397. void rgb32tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  398. {
  399. #ifdef CAN_COMPILE_X86_ASM
  400. // ordered per speed fasterst first
  401. if(gCpuCaps.hasMMX2)
  402. rgb32tobgr15_MMX2(src, dst, src_size);
  403. else if(gCpuCaps.has3DNow)
  404. rgb32tobgr15_3DNow(src, dst, src_size);
  405. else if(gCpuCaps.hasMMX)
  406. rgb32tobgr15_MMX(src, dst, src_size);
  407. else
  408. #endif
  409. rgb32tobgr15_C(src, dst, src_size);
  410. }
  411. void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  412. {
  413. unsigned i;
  414. for(i=0; 3*i<src_size; i++)
  415. {
  416. dst[4*i + 0] = src[3*i + 2];
  417. dst[4*i + 1] = src[3*i + 1];
  418. dst[4*i + 2] = src[3*i + 0];
  419. dst[4*i + 3] = 0;
  420. }
  421. }
  422. void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  423. {
  424. #ifdef CAN_COMPILE_X86_ASM
  425. // ordered per speed fasterst first
  426. if(gCpuCaps.hasMMX2)
  427. rgb24tobgr24_MMX2(src, dst, src_size);
  428. else if(gCpuCaps.has3DNow)
  429. rgb24tobgr24_3DNow(src, dst, src_size);
  430. else if(gCpuCaps.hasMMX)
  431. rgb24tobgr24_MMX(src, dst, src_size);
  432. else
  433. #endif
  434. rgb24tobgr24_C(src, dst, src_size);
  435. }
  436. void rgb24tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  437. {
  438. #ifdef CAN_COMPILE_X86_ASM
  439. // ordered per speed fasterst first
  440. if(gCpuCaps.hasMMX2)
  441. rgb24tobgr16_MMX2(src, dst, src_size);
  442. else if(gCpuCaps.has3DNow)
  443. rgb24tobgr16_3DNow(src, dst, src_size);
  444. else if(gCpuCaps.hasMMX)
  445. rgb24tobgr16_MMX(src, dst, src_size);
  446. else
  447. #endif
  448. rgb24tobgr16_C(src, dst, src_size);
  449. }
  450. void rgb24tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  451. {
  452. #ifdef CAN_COMPILE_X86_ASM
  453. // ordered per speed fasterst first
  454. if(gCpuCaps.hasMMX2)
  455. rgb24tobgr15_MMX2(src, dst, src_size);
  456. else if(gCpuCaps.has3DNow)
  457. rgb24tobgr15_3DNow(src, dst, src_size);
  458. else if(gCpuCaps.hasMMX)
  459. rgb24tobgr15_MMX(src, dst, src_size);
  460. else
  461. #endif
  462. rgb24tobgr15_C(src, dst, src_size);
  463. }
  464. void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  465. {
  466. const uint16_t *end;
  467. uint8_t *d = (uint8_t *)dst;
  468. const uint16_t *s = (uint16_t *)src;
  469. end = s + src_size/2;
  470. while(s < end)
  471. {
  472. register uint16_t bgr;
  473. bgr = *s++;
  474. *d++ = (bgr&0xF800)>>8;
  475. *d++ = (bgr&0x7E0)>>3;
  476. *d++ = (bgr&0x1F)<<3;
  477. *d++ = 0;
  478. }
  479. }
  480. void rgb16tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  481. {
  482. const uint16_t *end;
  483. uint8_t *d = (uint8_t *)dst;
  484. const uint16_t *s = (const uint16_t *)src;
  485. end = s + src_size/2;
  486. while(s < end)
  487. {
  488. register uint16_t bgr;
  489. bgr = *s++;
  490. *d++ = (bgr&0xF800)>>8;
  491. *d++ = (bgr&0x7E0)>>3;
  492. *d++ = (bgr&0x1F)<<3;
  493. }
  494. }
  495. void rgb16tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  496. {
  497. unsigned i;
  498. unsigned num_pixels = src_size >> 1;
  499. for(i=0; i<num_pixels; i++)
  500. {
  501. unsigned b,g,r;
  502. register uint16_t rgb;
  503. rgb = src[2*i];
  504. r = rgb&0x1F;
  505. g = (rgb&0x7E0)>>5;
  506. b = (rgb&0xF800)>>11;
  507. dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
  508. }
  509. }
  510. void rgb16tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  511. {
  512. unsigned i;
  513. unsigned num_pixels = src_size >> 1;
  514. for(i=0; i<num_pixels; i++)
  515. {
  516. unsigned b,g,r;
  517. register uint16_t rgb;
  518. rgb = src[2*i];
  519. r = rgb&0x1F;
  520. g = (rgb&0x7E0)>>5;
  521. b = (rgb&0xF800)>>11;
  522. dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
  523. }
  524. }
  525. void rgb15tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  526. {
  527. const uint16_t *end;
  528. uint8_t *d = (uint8_t *)dst;
  529. const uint16_t *s = (const uint16_t *)src;
  530. end = s + src_size/2;
  531. while(s < end)
  532. {
  533. register uint16_t bgr;
  534. bgr = *s++;
  535. *d++ = (bgr&0x7C00)>>7;
  536. *d++ = (bgr&0x3E0)>>2;
  537. *d++ = (bgr&0x1F)<<3;
  538. *d++ = 0;
  539. }
  540. }
  541. void rgb15tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  542. {
  543. const uint16_t *end;
  544. uint8_t *d = (uint8_t *)dst;
  545. const uint16_t *s = (uint16_t *)src;
  546. end = s + src_size/2;
  547. while(s < end)
  548. {
  549. register uint16_t bgr;
  550. bgr = *s++;
  551. *d++ = (bgr&0x7C00)>>7;
  552. *d++ = (bgr&0x3E0)>>2;
  553. *d++ = (bgr&0x1F)<<3;
  554. }
  555. }
  556. void rgb15tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  557. {
  558. unsigned i;
  559. unsigned num_pixels = src_size >> 1;
  560. for(i=0; i<num_pixels; i++)
  561. {
  562. unsigned b,g,r;
  563. register uint16_t rgb;
  564. rgb = src[2*i];
  565. r = rgb&0x1F;
  566. g = (rgb&0x3E0)>>5;
  567. b = (rgb&0x7C00)>>10;
  568. dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
  569. }
  570. }
  571. void rgb15tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  572. {
  573. unsigned i;
  574. unsigned num_pixels = src_size >> 1;
  575. for(i=0; i<num_pixels; i++)
  576. {
  577. unsigned b,g,r;
  578. register uint16_t rgb;
  579. rgb = src[2*i];
  580. r = rgb&0x1F;
  581. g = (rgb&0x3E0)>>5;
  582. b = (rgb&0x7C00)>>10;
  583. dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
  584. }
  585. }
  586. void rgb8tobgr8(const uint8_t *src, uint8_t *dst, unsigned int src_size)
  587. {
  588. unsigned i;
  589. unsigned num_pixels = src_size;
  590. for(i=0; i<num_pixels; i++)
  591. {
  592. unsigned b,g,r;
  593. register uint8_t rgb;
  594. rgb = src[i];
  595. r = (rgb&0x07);
  596. g = (rgb&0x38)>>3;
  597. b = (rgb&0xC0)>>6;
  598. dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
  599. }
  600. }
  601. /**
  602. *
  603. * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
  604. * problem for anyone then tell me, and ill fix it)
  605. */
  606. void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
  607. unsigned int width, unsigned int height,
  608. unsigned int lumStride, unsigned int chromStride, unsigned int dstStride)
  609. {
  610. #ifdef CAN_COMPILE_X86_ASM
  611. // ordered per speed fasterst first
  612. if(gCpuCaps.hasMMX2)
  613. yv12toyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  614. else if(gCpuCaps.has3DNow)
  615. yv12toyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  616. else if(gCpuCaps.hasMMX)
  617. yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  618. else
  619. #endif
  620. yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  621. }
  622. /**
  623. *
  624. * width should be a multiple of 16
  625. */
  626. void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
  627. unsigned int width, unsigned int height,
  628. unsigned int lumStride, unsigned int chromStride, unsigned int dstStride)
  629. {
  630. #ifdef CAN_COMPILE_X86_ASM
  631. // ordered per speed fasterst first
  632. if(gCpuCaps.hasMMX2)
  633. yuv422ptoyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  634. else if(gCpuCaps.has3DNow)
  635. yuv422ptoyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  636. else if(gCpuCaps.hasMMX)
  637. yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  638. else
  639. #endif
  640. yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
  641. }
  642. /**
  643. *
  644. * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
  645. * problem for anyone then tell me, and ill fix it)
  646. */
  647. void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  648. unsigned int width, unsigned int height,
  649. unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
  650. {
  651. #ifdef CAN_COMPILE_X86_ASM
  652. // ordered per speed fasterst first
  653. if(gCpuCaps.hasMMX2)
  654. yuy2toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  655. else if(gCpuCaps.has3DNow)
  656. yuy2toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  657. else if(gCpuCaps.hasMMX)
  658. yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  659. else
  660. #endif
  661. yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  662. }
  663. /**
  664. *
  665. * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
  666. * problem for anyone then tell me, and ill fix it)
  667. * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
  668. */
  669. void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  670. unsigned int width, unsigned int height,
  671. unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
  672. {
  673. #ifdef CAN_COMPILE_X86_ASM
  674. // ordered per speed fasterst first
  675. if(gCpuCaps.hasMMX2)
  676. uyvytoyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  677. else if(gCpuCaps.has3DNow)
  678. uyvytoyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  679. else if(gCpuCaps.hasMMX)
  680. uyvytoyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  681. else
  682. uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  683. #else
  684. uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  685. #endif
  686. }
  687. void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
  688. uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  689. unsigned int width, unsigned int height,
  690. unsigned int lumStride, unsigned int chromStride)
  691. {
  692. #ifdef CAN_COMPILE_X86_ASM
  693. // ordered per speed fasterst first
  694. if(gCpuCaps.hasMMX2)
  695. yvu9toyv12_MMX2(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
  696. else if(gCpuCaps.has3DNow)
  697. yvu9toyv12_3DNow(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
  698. else if(gCpuCaps.hasMMX)
  699. yvu9toyv12_MMX(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
  700. else
  701. yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
  702. #else
  703. yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
  704. #endif
  705. }
  706. void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
  707. {
  708. #ifdef CAN_COMPILE_X86_ASM
  709. // ordered per speed fasterst first
  710. if(gCpuCaps.hasMMX2)
  711. planar2x_MMX2(src, dst, width, height, srcStride, dstStride);
  712. else if(gCpuCaps.has3DNow)
  713. planar2x_3DNow(src, dst, width, height, srcStride, dstStride);
  714. else
  715. #endif
  716. planar2x_C(src, dst, width, height, srcStride, dstStride);
  717. }
  718. /**
  719. *
  720. * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
  721. * problem for anyone then tell me, and ill fix it)
  722. * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
  723. */
  724. void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  725. unsigned int width, unsigned int height,
  726. unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
  727. {
  728. #ifdef CAN_COMPILE_X86_ASM
  729. // ordered per speed fasterst first
  730. if(gCpuCaps.hasMMX2)
  731. rgb24toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  732. else if(gCpuCaps.has3DNow)
  733. rgb24toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  734. else if(gCpuCaps.hasMMX)
  735. rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  736. else
  737. #endif
  738. rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
  739. }
  740. void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
  741. unsigned width, unsigned height, unsigned src1Stride,
  742. unsigned src2Stride, unsigned dstStride)
  743. {
  744. #ifdef CAN_COMPILE_X86_ASM
  745. // ordered per speed fasterst first
  746. if(gCpuCaps.hasMMX2)
  747. interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
  748. else if(gCpuCaps.has3DNow)
  749. interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
  750. else if(gCpuCaps.hasMMX)
  751. interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
  752. else
  753. #endif
  754. interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
  755. }
  756. void vu9_to_vu12(const uint8_t *src1, const uint8_t *src2,
  757. uint8_t *dst1, uint8_t *dst2,
  758. unsigned width, unsigned height,
  759. unsigned srcStride1, unsigned srcStride2,
  760. unsigned dstStride1, unsigned dstStride2)
  761. {
  762. #ifdef CAN_COMPILE_X86_ASM
  763. if(gCpuCaps.hasMMX2)
  764. vu9_to_vu12_MMX2(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
  765. else if(gCpuCaps.has3DNow)
  766. vu9_to_vu12_3DNow(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
  767. else if(gCpuCaps.hasMMX)
  768. vu9_to_vu12_MMX(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
  769. else
  770. #endif
  771. vu9_to_vu12_C(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
  772. }
  773. void yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
  774. uint8_t *dst,
  775. unsigned width, unsigned height,
  776. unsigned srcStride1, unsigned srcStride2,
  777. unsigned srcStride3, unsigned dstStride)
  778. {
  779. #ifdef CAN_COMPILE_X86_ASM
  780. if(gCpuCaps.hasMMX2)
  781. yvu9_to_yuy2_MMX2(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
  782. else if(gCpuCaps.has3DNow)
  783. yvu9_to_yuy2_3DNow(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
  784. else if(gCpuCaps.hasMMX)
  785. yvu9_to_yuy2_MMX(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
  786. else
  787. #endif
  788. yvu9_to_yuy2_C(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
  789. }