You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

775 lines
23KB

  1. /*
  2. * software YUV to RGB converter
  3. *
  4. * Copyright (C) 2009 Konstantin Shishkov
  5. *
  6. * MMX/MMX2 template stuff (needed for fast movntq support),
  7. * 1,4,8bpp support and context / deglobalize stuff
  8. * by Michael Niedermayer (michaelni@gmx.at)
  9. *
  10. * This file is part of FFmpeg.
  11. *
  12. * FFmpeg is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * FFmpeg is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with FFmpeg; if not, write to the Free Software
  24. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25. */
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <inttypes.h>
  29. #include <assert.h>
  30. #include "config.h"
  31. #include "rgb2rgb.h"
  32. #include "swscale.h"
  33. #include "swscale_internal.h"
  34. #include "libavutil/x86_cpu.h"
  35. #define DITHER1XBPP // only for MMX
  36. extern const uint8_t dither_8x8_32[8][8];
  37. extern const uint8_t dither_8x8_73[8][8];
  38. extern const uint8_t dither_8x8_220[8][8];
  39. #if HAVE_MMX && CONFIG_GPL
  40. /* hope these constant values are cache line aligned */
  41. DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
  42. DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
  43. DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
  44. //MMX versions
  45. #undef RENAME
  46. #undef HAVE_MMX2
  47. #undef HAVE_AMD3DNOW
  48. #define HAVE_MMX2 0
  49. #define HAVE_AMD3DNOW 0
  50. #define RENAME(a) a ## _MMX
  51. #include "yuv2rgb_template.c"
  52. //MMX2 versions
  53. #undef RENAME
  54. #undef HAVE_MMX2
  55. #define HAVE_MMX2 1
  56. #define RENAME(a) a ## _MMX2
  57. #include "yuv2rgb_template.c"
  58. #endif /* HAVE_MMX && CONFIG_GPL */
  59. const int32_t ff_yuv2rgb_coeffs[8][4] = {
  60. {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
  61. {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
  62. {104597, 132201, 25675, 53279}, /* unspecified */
  63. {104597, 132201, 25675, 53279}, /* reserved */
  64. {104448, 132798, 24759, 53109}, /* FCC */
  65. {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
  66. {104597, 132201, 25675, 53279}, /* SMPTE 170M */
  67. {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
  68. };
  69. #define LOADCHROMA(i) \
  70. U = pu[i]; \
  71. V = pv[i]; \
  72. r = (void *)c->table_rV[V]; \
  73. g = (void *)(c->table_gU[U] + c->table_gV[V]); \
  74. b = (void *)c->table_bU[U];
  75. #define PUTRGB(dst,src,i,o) \
  76. Y = src[2*i+o]; \
  77. dst[2*i ] = r[Y] + g[Y] + b[Y]; \
  78. Y = src[2*i+1-o]; \
  79. dst[2*i+1] = r[Y] + g[Y] + b[Y];
  80. #define PUTRGB24(dst,src,i) \
  81. Y = src[2*i]; \
  82. dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
  83. Y = src[2*i+1]; \
  84. dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
  85. #define PUTBGR24(dst,src,i) \
  86. Y = src[2*i]; \
  87. dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
  88. Y = src[2*i+1]; \
  89. dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
  90. #define PUTRGBA(dst,ysrc,asrc,i,o,s) \
  91. Y = ysrc[2*i+o]; \
  92. dst[2*i ] = r[Y] + g[Y] + b[Y] + (asrc[2*i ]<<s); \
  93. Y = ysrc[2*i+1-o]; \
  94. dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s);
  95. #define YUV2RGBFUNC(func_name, dst_type, alpha) \
  96. static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
  97. int srcSliceH, uint8_t* dst[], int dstStride[]){\
  98. int y;\
  99. \
  100. if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
  101. srcStride[1] *= 2;\
  102. srcStride[2] *= 2;\
  103. }\
  104. for (y=0; y<srcSliceH; y+=2) {\
  105. dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY )*dstStride[0]);\
  106. dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
  107. dst_type av_unused *r, *b;\
  108. dst_type *g;\
  109. uint8_t *py_1 = src[0] + y*srcStride[0];\
  110. uint8_t *py_2 = py_1 + srcStride[0];\
  111. uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
  112. uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
  113. uint8_t av_unused *pa_1, *pa_2;\
  114. unsigned int h_size = c->dstW>>3;\
  115. if (alpha){\
  116. pa_1 = src[3] + y*srcStride[3];\
  117. pa_2 = pa_1 + srcStride[3];\
  118. }\
  119. while (h_size--) {\
  120. int av_unused U, V;\
  121. int Y;\
  122. #define ENDYUV2RGBLINE(dst_delta)\
  123. pu += 4;\
  124. pv += 4;\
  125. py_1 += 8;\
  126. py_2 += 8;\
  127. dst_1 += dst_delta;\
  128. dst_2 += dst_delta;\
  129. }\
  130. if (c->dstW & 4) {\
  131. int av_unused Y, U, V;\
  132. #define ENDYUV2RGBFUNC()\
  133. }\
  134. }\
  135. return srcSliceH;\
  136. }
  137. #define CLOSEYUV2RGBFUNC(dst_delta)\
  138. ENDYUV2RGBLINE(dst_delta)\
  139. ENDYUV2RGBFUNC()
  140. YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
  141. LOADCHROMA(0);
  142. PUTRGB(dst_1,py_1,0,0);
  143. PUTRGB(dst_2,py_2,0,1);
  144. LOADCHROMA(1);
  145. PUTRGB(dst_2,py_2,1,1);
  146. PUTRGB(dst_1,py_1,1,0);
  147. LOADCHROMA(1);
  148. PUTRGB(dst_2,py_2,1,1);
  149. PUTRGB(dst_1,py_1,1,0);
  150. LOADCHROMA(2);
  151. PUTRGB(dst_1,py_1,2,0);
  152. PUTRGB(dst_2,py_2,2,1);
  153. LOADCHROMA(3);
  154. PUTRGB(dst_2,py_2,3,1);
  155. PUTRGB(dst_1,py_1,3,0);
  156. ENDYUV2RGBLINE(8)
  157. LOADCHROMA(0);
  158. PUTRGB(dst_1,py_1,0,0);
  159. PUTRGB(dst_2,py_2,0,1);
  160. LOADCHROMA(1);
  161. PUTRGB(dst_2,py_2,1,1);
  162. PUTRGB(dst_1,py_1,1,0);
  163. ENDYUV2RGBFUNC()
  164. YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
  165. LOADCHROMA(0);
  166. PUTRGBA(dst_1,py_1,pa_1,0,0,24);
  167. PUTRGBA(dst_2,py_2,pa_2,0,1,24);
  168. LOADCHROMA(1);
  169. PUTRGBA(dst_2,py_2,pa_1,1,1,24);
  170. PUTRGBA(dst_1,py_1,pa_2,1,0,24);
  171. LOADCHROMA(1);
  172. PUTRGBA(dst_2,py_2,pa_1,1,1,24);
  173. PUTRGBA(dst_1,py_1,pa_2,1,0,24);
  174. LOADCHROMA(2);
  175. PUTRGBA(dst_1,py_1,pa_1,2,0,24);
  176. PUTRGBA(dst_2,py_2,pa_2,2,1,24);
  177. LOADCHROMA(3);
  178. PUTRGBA(dst_2,py_2,pa_1,3,1,24);
  179. PUTRGBA(dst_1,py_1,pa_2,3,0,24);
  180. pa_1 += 8;\
  181. pa_2 += 8;\
  182. ENDYUV2RGBLINE(8)
  183. LOADCHROMA(0);
  184. PUTRGBA(dst_1,py_1,pa_1,0,0,24);
  185. PUTRGBA(dst_2,py_2,pa_2,0,1,24);
  186. LOADCHROMA(1);
  187. PUTRGBA(dst_2,py_2,pa_1,1,1,24);
  188. PUTRGBA(dst_1,py_1,pa_2,1,0,24);
  189. ENDYUV2RGBFUNC()
  190. YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
  191. LOADCHROMA(0);
  192. PUTRGBA(dst_1,py_1,pa_1,0,0,0);
  193. PUTRGBA(dst_2,py_2,pa_2,0,1,0);
  194. LOADCHROMA(1);
  195. PUTRGBA(dst_2,py_2,pa_2,1,1,0);
  196. PUTRGBA(dst_1,py_1,pa_1,1,0,0);
  197. LOADCHROMA(1);
  198. PUTRGBA(dst_2,py_2,pa_2,1,1,0);
  199. PUTRGBA(dst_1,py_1,pa_1,1,0,0);
  200. LOADCHROMA(2);
  201. PUTRGBA(dst_1,py_1,pa_1,2,0,0);
  202. PUTRGBA(dst_2,py_2,pa_2,2,1,0);
  203. LOADCHROMA(3);
  204. PUTRGBA(dst_2,py_2,pa_2,3,1,0);
  205. PUTRGBA(dst_1,py_1,pa_1,3,0,0);
  206. pa_1 += 8;\
  207. pa_2 += 8;\
  208. ENDYUV2RGBLINE(8)
  209. LOADCHROMA(0);
  210. PUTRGBA(dst_1,py_1,pa_1,0,0,0);
  211. PUTRGBA(dst_2,py_2,pa_2,0,1,0);
  212. LOADCHROMA(1);
  213. PUTRGBA(dst_2,py_2,pa_2,1,1,0);
  214. PUTRGBA(dst_1,py_1,pa_1,1,0,0);
  215. ENDYUV2RGBFUNC()
  216. YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
  217. LOADCHROMA(0);
  218. PUTRGB24(dst_1,py_1,0);
  219. PUTRGB24(dst_2,py_2,0);
  220. LOADCHROMA(1);
  221. PUTRGB24(dst_2,py_2,1);
  222. PUTRGB24(dst_1,py_1,1);
  223. LOADCHROMA(2);
  224. PUTRGB24(dst_1,py_1,2);
  225. PUTRGB24(dst_2,py_2,2);
  226. LOADCHROMA(3);
  227. PUTRGB24(dst_2,py_2,3);
  228. PUTRGB24(dst_1,py_1,3);
  229. ENDYUV2RGBLINE(24)
  230. LOADCHROMA(0);
  231. PUTRGB24(dst_1,py_1,0);
  232. PUTRGB24(dst_2,py_2,0);
  233. LOADCHROMA(1);
  234. PUTRGB24(dst_2,py_2,1);
  235. PUTRGB24(dst_1,py_1,1);
  236. ENDYUV2RGBFUNC()
  237. // only trivial mods from yuv2rgb_c_24_rgb
  238. YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
  239. LOADCHROMA(0);
  240. PUTBGR24(dst_1,py_1,0);
  241. PUTBGR24(dst_2,py_2,0);
  242. LOADCHROMA(1);
  243. PUTBGR24(dst_2,py_2,1);
  244. PUTBGR24(dst_1,py_1,1);
  245. LOADCHROMA(2);
  246. PUTBGR24(dst_1,py_1,2);
  247. PUTBGR24(dst_2,py_2,2);
  248. LOADCHROMA(3);
  249. PUTBGR24(dst_2,py_2,3);
  250. PUTBGR24(dst_1,py_1,3);
  251. ENDYUV2RGBLINE(24)
  252. LOADCHROMA(0);
  253. PUTBGR24(dst_1,py_1,0);
  254. PUTBGR24(dst_2,py_2,0);
  255. LOADCHROMA(1);
  256. PUTBGR24(dst_2,py_2,1);
  257. PUTBGR24(dst_1,py_1,1);
  258. ENDYUV2RGBFUNC()
  259. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  260. // r, g, b, dst_1, dst_2
  261. YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
  262. LOADCHROMA(0);
  263. PUTRGB(dst_1,py_1,0,0);
  264. PUTRGB(dst_2,py_2,0,1);
  265. LOADCHROMA(1);
  266. PUTRGB(dst_2,py_2,1,1);
  267. PUTRGB(dst_1,py_1,1,0);
  268. LOADCHROMA(2);
  269. PUTRGB(dst_1,py_1,2,0);
  270. PUTRGB(dst_2,py_2,2,1);
  271. LOADCHROMA(3);
  272. PUTRGB(dst_2,py_2,3,1);
  273. PUTRGB(dst_1,py_1,3,0);
  274. CLOSEYUV2RGBFUNC(8)
  275. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  276. // r, g, b, dst_1, dst_2
  277. YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
  278. LOADCHROMA(0);
  279. PUTRGB(dst_1,py_1,0,0);
  280. PUTRGB(dst_2,py_2,0,1);
  281. LOADCHROMA(1);
  282. PUTRGB(dst_2,py_2,1,1);
  283. PUTRGB(dst_1,py_1,1,0);
  284. LOADCHROMA(2);
  285. PUTRGB(dst_1,py_1,2,0);
  286. PUTRGB(dst_2,py_2,2,1);
  287. LOADCHROMA(3);
  288. PUTRGB(dst_2,py_2,3,1);
  289. PUTRGB(dst_1,py_1,3,0);
  290. CLOSEYUV2RGBFUNC(8)
  291. // r, g, b, dst_1, dst_2
  292. YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
  293. const uint8_t *d32 = dither_8x8_32[y&7];
  294. const uint8_t *d64 = dither_8x8_73[y&7];
  295. #define PUTRGB8(dst,src,i,o) \
  296. Y = src[2*i]; \
  297. dst[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
  298. Y = src[2*i+1]; \
  299. dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
  300. LOADCHROMA(0);
  301. PUTRGB8(dst_1,py_1,0,0);
  302. PUTRGB8(dst_2,py_2,0,0+8);
  303. LOADCHROMA(1);
  304. PUTRGB8(dst_2,py_2,1,2+8);
  305. PUTRGB8(dst_1,py_1,1,2);
  306. LOADCHROMA(2);
  307. PUTRGB8(dst_1,py_1,2,4);
  308. PUTRGB8(dst_2,py_2,2,4+8);
  309. LOADCHROMA(3);
  310. PUTRGB8(dst_2,py_2,3,6+8);
  311. PUTRGB8(dst_1,py_1,3,6);
  312. CLOSEYUV2RGBFUNC(8)
  313. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  314. // r, g, b, dst_1, dst_2
  315. YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0)
  316. int acc;
  317. #define PUTRGB4(dst,src,i) \
  318. Y = src[2*i]; \
  319. acc = r[Y] + g[Y] + b[Y]; \
  320. Y = src[2*i+1]; \
  321. acc |= (r[Y] + g[Y] + b[Y])<<4; \
  322. dst[i] = acc;
  323. LOADCHROMA(0);
  324. PUTRGB4(dst_1,py_1,0);
  325. PUTRGB4(dst_2,py_2,0);
  326. LOADCHROMA(1);
  327. PUTRGB4(dst_2,py_2,1);
  328. PUTRGB4(dst_1,py_1,1);
  329. LOADCHROMA(2);
  330. PUTRGB4(dst_1,py_1,2);
  331. PUTRGB4(dst_2,py_2,2);
  332. LOADCHROMA(3);
  333. PUTRGB4(dst_2,py_2,3);
  334. PUTRGB4(dst_1,py_1,3);
  335. CLOSEYUV2RGBFUNC(4)
  336. YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
  337. const uint8_t *d64 = dither_8x8_73[y&7];
  338. const uint8_t *d128 = dither_8x8_220[y&7];
  339. int acc;
  340. #define PUTRGB4D(dst,src,i,o) \
  341. Y = src[2*i]; \
  342. acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
  343. Y = src[2*i+1]; \
  344. acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \
  345. dst[i]= acc;
  346. LOADCHROMA(0);
  347. PUTRGB4D(dst_1,py_1,0,0);
  348. PUTRGB4D(dst_2,py_2,0,0+8);
  349. LOADCHROMA(1);
  350. PUTRGB4D(dst_2,py_2,1,2+8);
  351. PUTRGB4D(dst_1,py_1,1,2);
  352. LOADCHROMA(2);
  353. PUTRGB4D(dst_1,py_1,2,4);
  354. PUTRGB4D(dst_2,py_2,2,4+8);
  355. LOADCHROMA(3);
  356. PUTRGB4D(dst_2,py_2,3,6+8);
  357. PUTRGB4D(dst_1,py_1,3,6);
  358. CLOSEYUV2RGBFUNC(4)
  359. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  360. // r, g, b, dst_1, dst_2
  361. YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0)
  362. LOADCHROMA(0);
  363. PUTRGB(dst_1,py_1,0,0);
  364. PUTRGB(dst_2,py_2,0,1);
  365. LOADCHROMA(1);
  366. PUTRGB(dst_2,py_2,1,1);
  367. PUTRGB(dst_1,py_1,1,0);
  368. LOADCHROMA(2);
  369. PUTRGB(dst_1,py_1,2,0);
  370. PUTRGB(dst_2,py_2,2,1);
  371. LOADCHROMA(3);
  372. PUTRGB(dst_2,py_2,3,1);
  373. PUTRGB(dst_1,py_1,3,0);
  374. CLOSEYUV2RGBFUNC(8)
  375. YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
  376. const uint8_t *d64 = dither_8x8_73[y&7];
  377. const uint8_t *d128 = dither_8x8_220[y&7];
  378. #define PUTRGB4DB(dst,src,i,o) \
  379. Y = src[2*i]; \
  380. dst[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
  381. Y = src[2*i+1]; \
  382. dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
  383. LOADCHROMA(0);
  384. PUTRGB4DB(dst_1,py_1,0,0);
  385. PUTRGB4DB(dst_2,py_2,0,0+8);
  386. LOADCHROMA(1);
  387. PUTRGB4DB(dst_2,py_2,1,2+8);
  388. PUTRGB4DB(dst_1,py_1,1,2);
  389. LOADCHROMA(2);
  390. PUTRGB4DB(dst_1,py_1,2,4);
  391. PUTRGB4DB(dst_2,py_2,2,4+8);
  392. LOADCHROMA(3);
  393. PUTRGB4DB(dst_2,py_2,3,6+8);
  394. PUTRGB4DB(dst_1,py_1,3,6);
  395. CLOSEYUV2RGBFUNC(8)
  396. YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
  397. const uint8_t *d128 = dither_8x8_220[y&7];
  398. char out_1 = 0, out_2 = 0;
  399. g= c->table_gU[128] + c->table_gV[128];
  400. #define PUTRGB1(out,src,i,o) \
  401. Y = src[2*i]; \
  402. out+= out + g[Y+d128[0+o]]; \
  403. Y = src[2*i+1]; \
  404. out+= out + g[Y+d128[1+o]];
  405. PUTRGB1(out_1,py_1,0,0);
  406. PUTRGB1(out_2,py_2,0,0+8);
  407. PUTRGB1(out_2,py_2,1,2+8);
  408. PUTRGB1(out_1,py_1,1,2);
  409. PUTRGB1(out_1,py_1,2,4);
  410. PUTRGB1(out_2,py_2,2,4+8);
  411. PUTRGB1(out_2,py_2,3,6+8);
  412. PUTRGB1(out_1,py_1,3,6);
  413. dst_1[0]= out_1;
  414. dst_2[0]= out_2;
  415. CLOSEYUV2RGBFUNC(1)
  416. SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
  417. {
  418. SwsFunc t = NULL;
  419. #if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
  420. if (c->flags & SWS_CPU_CAPS_MMX2) {
  421. switch (c->dstFormat) {
  422. case PIX_FMT_RGB32:
  423. if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){
  424. #if HAVE_7REGS
  425. return yuva420_rgb32_MMX2;
  426. #else
  427. break;
  428. #endif
  429. }else return yuv420_rgb32_MMX2;
  430. case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
  431. case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
  432. case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
  433. }
  434. }
  435. if (c->flags & SWS_CPU_CAPS_MMX) {
  436. switch (c->dstFormat) {
  437. case PIX_FMT_RGB32:
  438. if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){
  439. #if HAVE_7REGS
  440. return yuva420_rgb32_MMX;
  441. #else
  442. break;
  443. #endif
  444. }else return yuv420_rgb32_MMX;
  445. case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
  446. case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
  447. case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
  448. }
  449. }
  450. #endif
  451. #if HAVE_VIS
  452. t = ff_yuv2rgb_init_vis(c);
  453. #endif
  454. #if CONFIG_MLIB
  455. t = ff_yuv2rgb_init_mlib(c);
  456. #endif
  457. #if HAVE_ALTIVEC && CONFIG_GPL
  458. if (c->flags & SWS_CPU_CAPS_ALTIVEC)
  459. t = ff_yuv2rgb_init_altivec(c);
  460. #endif
  461. #if ARCH_BFIN
  462. if (c->flags & SWS_CPU_CAPS_BFIN)
  463. t = ff_yuv2rgb_get_func_ptr_bfin(c);
  464. #endif
  465. if (t)
  466. return t;
  467. av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
  468. switch (c->dstFormat) {
  469. case PIX_FMT_ARGB:
  470. case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c;
  471. case PIX_FMT_RGBA:
  472. case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32;
  473. case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb;
  474. case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr;
  475. case PIX_FMT_RGB565:
  476. case PIX_FMT_BGR565:
  477. case PIX_FMT_RGB555:
  478. case PIX_FMT_BGR555: return yuv2rgb_c_16;
  479. case PIX_FMT_RGB8:
  480. case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither;
  481. case PIX_FMT_RGB4:
  482. case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither;
  483. case PIX_FMT_RGB4_BYTE:
  484. case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither;
  485. case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither;
  486. default:
  487. assert(0);
  488. }
  489. return NULL;
  490. }
  491. static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table)
  492. {
  493. int i;
  494. int64_t cb = 0;
  495. y_table -= elemsize * (inc >> 9);
  496. for (i = 0; i < 256; i++) {
  497. table[i] = y_table + elemsize * (cb >> 16);
  498. cb += inc;
  499. }
  500. }
  501. static void fill_gv_table(int table[256], const int elemsize, const int inc)
  502. {
  503. int i;
  504. int64_t cb = 0;
  505. int off = -(inc >> 9);
  506. for (i = 0; i < 256; i++) {
  507. table[i] = elemsize * (off + (cb >> 16));
  508. cb += inc;
  509. }
  510. }
  511. av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
  512. int brightness, int contrast, int saturation)
  513. {
  514. const int isRgb = c->dstFormat==PIX_FMT_RGB32
  515. || c->dstFormat==PIX_FMT_RGB32_1
  516. || c->dstFormat==PIX_FMT_BGR24
  517. || c->dstFormat==PIX_FMT_RGB565
  518. || c->dstFormat==PIX_FMT_RGB555
  519. || c->dstFormat==PIX_FMT_RGB8
  520. || c->dstFormat==PIX_FMT_RGB4
  521. || c->dstFormat==PIX_FMT_RGB4_BYTE
  522. || c->dstFormat==PIX_FMT_MONOBLACK;
  523. const int bpp = fmt_depth(c->dstFormat);
  524. uint8_t *y_table;
  525. uint16_t *y_table16;
  526. uint32_t *y_table32;
  527. int i, base, rbase, gbase, bbase, abase, needAlpha;
  528. const int yoffs = fullRange ? 384 : 326;
  529. int64_t crv = inv_table[0];
  530. int64_t cbu = inv_table[1];
  531. int64_t cgu = -inv_table[2];
  532. int64_t cgv = -inv_table[3];
  533. int64_t cy = 1<<16;
  534. int64_t oy = 0;
  535. int64_t yb = 0;
  536. if (!fullRange) {
  537. cy = (cy*255) / 219;
  538. oy = 16<<16;
  539. } else {
  540. crv = (crv*224) / 255;
  541. cbu = (cbu*224) / 255;
  542. cgu = (cgu*224) / 255;
  543. cgv = (cgv*224) / 255;
  544. }
  545. cy = (cy *contrast ) >> 16;
  546. crv = (crv*contrast * saturation) >> 32;
  547. cbu = (cbu*contrast * saturation) >> 32;
  548. cgu = (cgu*contrast * saturation) >> 32;
  549. cgv = (cgv*contrast * saturation) >> 32;
  550. oy -= 256*brightness;
  551. //scale coefficients by cy
  552. crv = ((crv << 16) + 0x8000) / cy;
  553. cbu = ((cbu << 16) + 0x8000) / cy;
  554. cgu = ((cgu << 16) + 0x8000) / cy;
  555. cgv = ((cgv << 16) + 0x8000) / cy;
  556. av_free(c->yuvTable);
  557. switch (bpp) {
  558. case 1:
  559. c->yuvTable = av_malloc(1024);
  560. y_table = c->yuvTable;
  561. yb = -(384<<16) - oy;
  562. for (i = 0; i < 1024-110; i++) {
  563. y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
  564. yb += cy;
  565. }
  566. fill_table(c->table_gU, 1, cgu, y_table + yoffs);
  567. fill_gv_table(c->table_gV, 1, cgv);
  568. break;
  569. case 4:
  570. case 4|128:
  571. rbase = isRgb ? 3 : 0;
  572. gbase = 1;
  573. bbase = isRgb ? 0 : 3;
  574. c->yuvTable = av_malloc(1024*3);
  575. y_table = c->yuvTable;
  576. yb = -(384<<16) - oy;
  577. for (i = 0; i < 1024-110; i++) {
  578. int yval = av_clip_uint8((yb + 0x8000) >> 16);
  579. y_table[i+110 ] = (yval >> 7) << rbase;
  580. y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
  581. y_table[i+110+2048] = (yval >> 7) << bbase;
  582. yb += cy;
  583. }
  584. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  585. fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
  586. fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
  587. fill_gv_table(c->table_gV, 1, cgv);
  588. break;
  589. case 8:
  590. rbase = isRgb ? 5 : 0;
  591. gbase = isRgb ? 2 : 3;
  592. bbase = isRgb ? 0 : 6;
  593. c->yuvTable = av_malloc(1024*3);
  594. y_table = c->yuvTable;
  595. yb = -(384<<16) - oy;
  596. for (i = 0; i < 1024-38; i++) {
  597. int yval = av_clip_uint8((yb + 0x8000) >> 16);
  598. y_table[i+16 ] = ((yval + 18) / 36) << rbase;
  599. y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
  600. y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
  601. yb += cy;
  602. }
  603. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  604. fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
  605. fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
  606. fill_gv_table(c->table_gV, 1, cgv);
  607. break;
  608. case 15:
  609. case 16:
  610. rbase = isRgb ? bpp - 5 : 0;
  611. gbase = 5;
  612. bbase = isRgb ? 0 : (bpp - 5);
  613. c->yuvTable = av_malloc(1024*3*2);
  614. y_table16 = c->yuvTable;
  615. yb = -(384<<16) - oy;
  616. for (i = 0; i < 1024; i++) {
  617. uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
  618. y_table16[i ] = (yval >> 3) << rbase;
  619. y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
  620. y_table16[i+2048] = (yval >> 3) << bbase;
  621. yb += cy;
  622. }
  623. fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
  624. fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
  625. fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
  626. fill_gv_table(c->table_gV, 2, cgv);
  627. break;
  628. case 24:
  629. c->yuvTable = av_malloc(1024);
  630. y_table = c->yuvTable;
  631. yb = -(384<<16) - oy;
  632. for (i = 0; i < 1024; i++) {
  633. y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
  634. yb += cy;
  635. }
  636. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  637. fill_table(c->table_gU, 1, cgu, y_table + yoffs);
  638. fill_table(c->table_bU, 1, cbu, y_table + yoffs);
  639. fill_gv_table(c->table_gV, 1, cgv);
  640. break;
  641. case 32:
  642. base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
  643. rbase = base + (isRgb ? 16 : 0);
  644. gbase = base + 8;
  645. bbase = base + (isRgb ? 0 : 16);
  646. needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
  647. if (!needAlpha)
  648. abase = (base + 24) & 31;
  649. c->yuvTable = av_malloc(1024*3*4);
  650. y_table32 = c->yuvTable;
  651. yb = -(384<<16) - oy;
  652. for (i = 0; i < 1024; i++) {
  653. uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
  654. y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase));
  655. y_table32[i+1024] = yval << gbase;
  656. y_table32[i+2048] = yval << bbase;
  657. yb += cy;
  658. }
  659. fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
  660. fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
  661. fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
  662. fill_gv_table(c->table_gV, 4, cgv);
  663. break;
  664. default:
  665. c->yuvTable = NULL;
  666. av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
  667. return -1;
  668. }
  669. return 0;
  670. }