You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

889 lines
26KB

  1. /*
  2. * software YUV to RGB converter
  3. *
  4. * Copyright (C) 2009 Konstantin Shishkov
  5. *
  6. * 1,4,8bpp support and context / deglobalize stuff
  7. * by Michael Niedermayer (michaelni@gmx.at)
  8. *
  9. * This file is part of FFmpeg.
  10. *
  11. * FFmpeg is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU Lesser General Public
  13. * License as published by the Free Software Foundation; either
  14. * version 2.1 of the License, or (at your option) any later version.
  15. *
  16. * FFmpeg is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * Lesser General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU Lesser General Public
  22. * License along with FFmpeg; if not, write to the Free Software
  23. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24. */
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <inttypes.h>
  28. #include <assert.h>
  29. #include "config.h"
  30. #include "rgb2rgb.h"
  31. #include "swscale.h"
  32. #include "swscale_internal.h"
  33. #include "libavutil/x86_cpu.h"
  34. #include "libavutil/bswap.h"
  35. extern const uint8_t dither_4x4_16[4][8];
  36. extern const uint8_t dither_8x8_32[8][8];
  37. extern const uint8_t dither_8x8_73[8][8];
  38. extern const uint8_t dither_8x8_220[8][8];
  39. const int32_t ff_yuv2rgb_coeffs[8][4] = {
  40. {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
  41. {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
  42. {104597, 132201, 25675, 53279}, /* unspecified */
  43. {104597, 132201, 25675, 53279}, /* reserved */
  44. {104448, 132798, 24759, 53109}, /* FCC */
  45. {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
  46. {104597, 132201, 25675, 53279}, /* SMPTE 170M */
  47. {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
  48. };
  49. const int *sws_getCoefficients(int colorspace)
  50. {
  51. if (colorspace > 7 || colorspace < 0)
  52. colorspace = SWS_CS_DEFAULT;
  53. return ff_yuv2rgb_coeffs[colorspace];
  54. }
  55. #define LOADCHROMA(i) \
  56. U = pu[i]; \
  57. V = pv[i]; \
  58. r = (void *)c->table_rV[V]; \
  59. g = (void *)(c->table_gU[U] + c->table_gV[V]); \
  60. b = (void *)c->table_bU[U];
  61. #define PUTRGB(dst,src,i) \
  62. Y = src[2*i]; \
  63. dst[2*i ] = r[Y] + g[Y] + b[Y]; \
  64. Y = src[2*i+1]; \
  65. dst[2*i+1] = r[Y] + g[Y] + b[Y];
  66. #define PUTRGB24(dst,src,i) \
  67. Y = src[2*i]; \
  68. dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
  69. Y = src[2*i+1]; \
  70. dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
  71. #define PUTBGR24(dst,src,i) \
  72. Y = src[2*i]; \
  73. dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
  74. Y = src[2*i+1]; \
  75. dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
  76. #define PUTRGBA(dst,ysrc,asrc,i,s) \
  77. Y = ysrc[2*i]; \
  78. dst[2*i ] = r[Y] + g[Y] + b[Y] + (asrc[2*i ]<<s); \
  79. Y = ysrc[2*i+1]; \
  80. dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s);
  81. #define PUTRGB48(dst,src,i) \
  82. Y = src[2*i]; \
  83. dst[12*i+ 0] = dst[12*i+ 1] = r[Y]; \
  84. dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
  85. dst[12*i+ 4] = dst[12*i+ 5] = b[Y]; \
  86. Y = src[2*i+1]; \
  87. dst[12*i+ 6] = dst[12*i+ 7] = r[Y]; \
  88. dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
  89. dst[12*i+10] = dst[12*i+11] = b[Y];
  90. #define PUTBGR48(dst,src,i) \
  91. Y = src[2*i]; \
  92. dst[12*i+ 0] = dst[12*i+ 1] = b[Y]; \
  93. dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
  94. dst[12*i+ 4] = dst[12*i+ 5] = r[Y]; \
  95. Y = src[2*i+1]; \
  96. dst[12*i+ 6] = dst[12*i+ 7] = b[Y]; \
  97. dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
  98. dst[12*i+10] = dst[12*i+11] = r[Y];
  99. #define YUV2RGBFUNC(func_name, dst_type, alpha) \
  100. static int func_name(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, \
  101. int srcSliceH, uint8_t* dst[], int dstStride[]) \
  102. {\
  103. int y;\
  104. \
  105. if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
  106. srcStride[1] *= 2;\
  107. srcStride[2] *= 2;\
  108. }\
  109. for (y=0; y<srcSliceH; y+=2) {\
  110. dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY )*dstStride[0]);\
  111. dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
  112. dst_type av_unused *r, *b;\
  113. dst_type *g;\
  114. const uint8_t *py_1 = src[0] + y*srcStride[0];\
  115. const uint8_t *py_2 = py_1 + srcStride[0];\
  116. const uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
  117. const uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
  118. const uint8_t av_unused *pa_1, *pa_2;\
  119. unsigned int h_size = c->dstW>>3;\
  120. if (alpha) {\
  121. pa_1 = src[3] + y*srcStride[3];\
  122. pa_2 = pa_1 + srcStride[3];\
  123. }\
  124. while (h_size--) {\
  125. int av_unused U, V;\
  126. int Y;\
  127. #define ENDYUV2RGBLINE(dst_delta)\
  128. pu += 4;\
  129. pv += 4;\
  130. py_1 += 8;\
  131. py_2 += 8;\
  132. dst_1 += dst_delta;\
  133. dst_2 += dst_delta;\
  134. }\
  135. if (c->dstW & 4) {\
  136. int av_unused Y, U, V;\
  137. #define ENDYUV2RGBFUNC()\
  138. }\
  139. }\
  140. return srcSliceH;\
  141. }
  142. #define CLOSEYUV2RGBFUNC(dst_delta)\
  143. ENDYUV2RGBLINE(dst_delta)\
  144. ENDYUV2RGBFUNC()
  145. YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
  146. LOADCHROMA(0);
  147. PUTRGB48(dst_1,py_1,0);
  148. PUTRGB48(dst_2,py_2,0);
  149. LOADCHROMA(1);
  150. PUTRGB48(dst_2,py_2,1);
  151. PUTRGB48(dst_1,py_1,1);
  152. LOADCHROMA(2);
  153. PUTRGB48(dst_1,py_1,2);
  154. PUTRGB48(dst_2,py_2,2);
  155. LOADCHROMA(3);
  156. PUTRGB48(dst_2,py_2,3);
  157. PUTRGB48(dst_1,py_1,3);
  158. ENDYUV2RGBLINE(48)
  159. LOADCHROMA(0);
  160. PUTRGB48(dst_1,py_1,0);
  161. PUTRGB48(dst_2,py_2,0);
  162. LOADCHROMA(1);
  163. PUTRGB48(dst_2,py_2,1);
  164. PUTRGB48(dst_1,py_1,1);
  165. ENDYUV2RGBFUNC()
  166. YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
  167. LOADCHROMA(0);
  168. PUTBGR48(dst_1,py_1,0);
  169. PUTBGR48(dst_2,py_2,0);
  170. LOADCHROMA(1);
  171. PUTBGR48(dst_2,py_2,1);
  172. PUTBGR48(dst_1,py_1,1);
  173. LOADCHROMA(2);
  174. PUTBGR48(dst_1,py_1,2);
  175. PUTBGR48(dst_2,py_2,2);
  176. LOADCHROMA(3);
  177. PUTBGR48(dst_2,py_2,3);
  178. PUTBGR48(dst_1,py_1,3);
  179. ENDYUV2RGBLINE(48)
  180. LOADCHROMA(0);
  181. PUTBGR48(dst_1,py_1,0);
  182. PUTBGR48(dst_2,py_2,0);
  183. LOADCHROMA(1);
  184. PUTBGR48(dst_2,py_2,1);
  185. PUTBGR48(dst_1,py_1,1);
  186. ENDYUV2RGBFUNC()
  187. YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
  188. LOADCHROMA(0);
  189. PUTRGB(dst_1,py_1,0);
  190. PUTRGB(dst_2,py_2,0);
  191. LOADCHROMA(1);
  192. PUTRGB(dst_2,py_2,1);
  193. PUTRGB(dst_1,py_1,1);
  194. LOADCHROMA(2);
  195. PUTRGB(dst_1,py_1,2);
  196. PUTRGB(dst_2,py_2,2);
  197. LOADCHROMA(3);
  198. PUTRGB(dst_2,py_2,3);
  199. PUTRGB(dst_1,py_1,3);
  200. ENDYUV2RGBLINE(8)
  201. LOADCHROMA(0);
  202. PUTRGB(dst_1,py_1,0);
  203. PUTRGB(dst_2,py_2,0);
  204. LOADCHROMA(1);
  205. PUTRGB(dst_2,py_2,1);
  206. PUTRGB(dst_1,py_1,1);
  207. ENDYUV2RGBFUNC()
  208. YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
  209. LOADCHROMA(0);
  210. PUTRGBA(dst_1,py_1,pa_1,0,24);
  211. PUTRGBA(dst_2,py_2,pa_2,0,24);
  212. LOADCHROMA(1);
  213. PUTRGBA(dst_2,py_2,pa_1,1,24);
  214. PUTRGBA(dst_1,py_1,pa_2,1,24);
  215. LOADCHROMA(2);
  216. PUTRGBA(dst_1,py_1,pa_1,2,24);
  217. PUTRGBA(dst_2,py_2,pa_2,2,24);
  218. LOADCHROMA(3);
  219. PUTRGBA(dst_2,py_2,pa_1,3,24);
  220. PUTRGBA(dst_1,py_1,pa_2,3,24);
  221. pa_1 += 8;\
  222. pa_2 += 8;\
  223. ENDYUV2RGBLINE(8)
  224. LOADCHROMA(0);
  225. PUTRGBA(dst_1,py_1,pa_1,0,24);
  226. PUTRGBA(dst_2,py_2,pa_2,0,24);
  227. LOADCHROMA(1);
  228. PUTRGBA(dst_2,py_2,pa_1,1,24);
  229. PUTRGBA(dst_1,py_1,pa_2,1,24);
  230. ENDYUV2RGBFUNC()
  231. YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
  232. LOADCHROMA(0);
  233. PUTRGBA(dst_1,py_1,pa_1,0,0);
  234. PUTRGBA(dst_2,py_2,pa_2,0,0);
  235. LOADCHROMA(1);
  236. PUTRGBA(dst_2,py_2,pa_2,1,0);
  237. PUTRGBA(dst_1,py_1,pa_1,1,0);
  238. LOADCHROMA(2);
  239. PUTRGBA(dst_1,py_1,pa_1,2,0);
  240. PUTRGBA(dst_2,py_2,pa_2,2,0);
  241. LOADCHROMA(3);
  242. PUTRGBA(dst_2,py_2,pa_2,3,0);
  243. PUTRGBA(dst_1,py_1,pa_1,3,0);
  244. pa_1 += 8;\
  245. pa_2 += 8;\
  246. ENDYUV2RGBLINE(8)
  247. LOADCHROMA(0);
  248. PUTRGBA(dst_1,py_1,pa_1,0,0);
  249. PUTRGBA(dst_2,py_2,pa_2,0,0);
  250. LOADCHROMA(1);
  251. PUTRGBA(dst_2,py_2,pa_2,1,0);
  252. PUTRGBA(dst_1,py_1,pa_1,1,0);
  253. ENDYUV2RGBFUNC()
  254. YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
  255. LOADCHROMA(0);
  256. PUTRGB24(dst_1,py_1,0);
  257. PUTRGB24(dst_2,py_2,0);
  258. LOADCHROMA(1);
  259. PUTRGB24(dst_2,py_2,1);
  260. PUTRGB24(dst_1,py_1,1);
  261. LOADCHROMA(2);
  262. PUTRGB24(dst_1,py_1,2);
  263. PUTRGB24(dst_2,py_2,2);
  264. LOADCHROMA(3);
  265. PUTRGB24(dst_2,py_2,3);
  266. PUTRGB24(dst_1,py_1,3);
  267. ENDYUV2RGBLINE(24)
  268. LOADCHROMA(0);
  269. PUTRGB24(dst_1,py_1,0);
  270. PUTRGB24(dst_2,py_2,0);
  271. LOADCHROMA(1);
  272. PUTRGB24(dst_2,py_2,1);
  273. PUTRGB24(dst_1,py_1,1);
  274. ENDYUV2RGBFUNC()
  275. // only trivial mods from yuv2rgb_c_24_rgb
  276. YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
  277. LOADCHROMA(0);
  278. PUTBGR24(dst_1,py_1,0);
  279. PUTBGR24(dst_2,py_2,0);
  280. LOADCHROMA(1);
  281. PUTBGR24(dst_2,py_2,1);
  282. PUTBGR24(dst_1,py_1,1);
  283. LOADCHROMA(2);
  284. PUTBGR24(dst_1,py_1,2);
  285. PUTBGR24(dst_2,py_2,2);
  286. LOADCHROMA(3);
  287. PUTBGR24(dst_2,py_2,3);
  288. PUTBGR24(dst_1,py_1,3);
  289. ENDYUV2RGBLINE(24)
  290. LOADCHROMA(0);
  291. PUTBGR24(dst_1,py_1,0);
  292. PUTBGR24(dst_2,py_2,0);
  293. LOADCHROMA(1);
  294. PUTBGR24(dst_2,py_2,1);
  295. PUTBGR24(dst_1,py_1,1);
  296. ENDYUV2RGBFUNC()
  297. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  298. // r, g, b, dst_1, dst_2
  299. YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
  300. LOADCHROMA(0);
  301. PUTRGB(dst_1,py_1,0);
  302. PUTRGB(dst_2,py_2,0);
  303. LOADCHROMA(1);
  304. PUTRGB(dst_2,py_2,1);
  305. PUTRGB(dst_1,py_1,1);
  306. LOADCHROMA(2);
  307. PUTRGB(dst_1,py_1,2);
  308. PUTRGB(dst_2,py_2,2);
  309. LOADCHROMA(3);
  310. PUTRGB(dst_2,py_2,3);
  311. PUTRGB(dst_1,py_1,3);
  312. CLOSEYUV2RGBFUNC(8)
  313. #if 0 // Currently unused
  314. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  315. // r, g, b, dst_1, dst_2
  316. YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
  317. LOADCHROMA(0);
  318. PUTRGB(dst_1,py_1,0);
  319. PUTRGB(dst_2,py_2,0);
  320. LOADCHROMA(1);
  321. PUTRGB(dst_2,py_2,1);
  322. PUTRGB(dst_1,py_1,1);
  323. LOADCHROMA(2);
  324. PUTRGB(dst_1,py_1,2);
  325. PUTRGB(dst_2,py_2,2);
  326. LOADCHROMA(3);
  327. PUTRGB(dst_2,py_2,3);
  328. PUTRGB(dst_1,py_1,3);
  329. CLOSEYUV2RGBFUNC(8)
  330. #endif
  331. // r, g, b, dst_1, dst_2
  332. YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
  333. const uint8_t *d16 = dither_4x4_16[y&3];
  334. #define PUTRGB12(dst,src,i,o) \
  335. Y = src[2*i]; \
  336. dst[2*i] = r[Y+d16[0+o]] + g[Y+d16[0+o]] + b[Y+d16[0+o]]; \
  337. Y = src[2*i+1]; \
  338. dst[2*i+1] = r[Y+d16[1+o]] + g[Y+d16[1+o]] + b[Y+d16[1+o]];
  339. LOADCHROMA(0);
  340. PUTRGB12(dst_1,py_1,0,0);
  341. PUTRGB12(dst_2,py_2,0,0+8);
  342. LOADCHROMA(1);
  343. PUTRGB12(dst_2,py_2,1,2+8);
  344. PUTRGB12(dst_1,py_1,1,2);
  345. LOADCHROMA(2);
  346. PUTRGB12(dst_1,py_1,2,4);
  347. PUTRGB12(dst_2,py_2,2,4+8);
  348. LOADCHROMA(3);
  349. PUTRGB12(dst_2,py_2,3,6+8);
  350. PUTRGB12(dst_1,py_1,3,6);
  351. CLOSEYUV2RGBFUNC(8)
  352. // r, g, b, dst_1, dst_2
  353. YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
  354. const uint8_t *d32 = dither_8x8_32[y&7];
  355. const uint8_t *d64 = dither_8x8_73[y&7];
  356. #define PUTRGB8(dst,src,i,o) \
  357. Y = src[2*i]; \
  358. dst[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
  359. Y = src[2*i+1]; \
  360. dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
  361. LOADCHROMA(0);
  362. PUTRGB8(dst_1,py_1,0,0);
  363. PUTRGB8(dst_2,py_2,0,0+8);
  364. LOADCHROMA(1);
  365. PUTRGB8(dst_2,py_2,1,2+8);
  366. PUTRGB8(dst_1,py_1,1,2);
  367. LOADCHROMA(2);
  368. PUTRGB8(dst_1,py_1,2,4);
  369. PUTRGB8(dst_2,py_2,2,4+8);
  370. LOADCHROMA(3);
  371. PUTRGB8(dst_2,py_2,3,6+8);
  372. PUTRGB8(dst_1,py_1,3,6);
  373. CLOSEYUV2RGBFUNC(8)
  374. #if 0 // Currently unused
  375. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  376. // r, g, b, dst_1, dst_2
  377. YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0)
  378. int acc;
  379. #define PUTRGB4(dst,src,i) \
  380. Y = src[2*i]; \
  381. acc = r[Y] + g[Y] + b[Y]; \
  382. Y = src[2*i+1]; \
  383. acc |= (r[Y] + g[Y] + b[Y])<<4; \
  384. dst[i] = acc;
  385. LOADCHROMA(0);
  386. PUTRGB4(dst_1,py_1,0);
  387. PUTRGB4(dst_2,py_2,0);
  388. LOADCHROMA(1);
  389. PUTRGB4(dst_2,py_2,1);
  390. PUTRGB4(dst_1,py_1,1);
  391. LOADCHROMA(2);
  392. PUTRGB4(dst_1,py_1,2);
  393. PUTRGB4(dst_2,py_2,2);
  394. LOADCHROMA(3);
  395. PUTRGB4(dst_2,py_2,3);
  396. PUTRGB4(dst_1,py_1,3);
  397. CLOSEYUV2RGBFUNC(4)
  398. #endif
  399. YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
  400. const uint8_t *d64 = dither_8x8_73[y&7];
  401. const uint8_t *d128 = dither_8x8_220[y&7];
  402. int acc;
  403. #define PUTRGB4D(dst,src,i,o) \
  404. Y = src[2*i]; \
  405. acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
  406. Y = src[2*i+1]; \
  407. acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \
  408. dst[i]= acc;
  409. LOADCHROMA(0);
  410. PUTRGB4D(dst_1,py_1,0,0);
  411. PUTRGB4D(dst_2,py_2,0,0+8);
  412. LOADCHROMA(1);
  413. PUTRGB4D(dst_2,py_2,1,2+8);
  414. PUTRGB4D(dst_1,py_1,1,2);
  415. LOADCHROMA(2);
  416. PUTRGB4D(dst_1,py_1,2,4);
  417. PUTRGB4D(dst_2,py_2,2,4+8);
  418. LOADCHROMA(3);
  419. PUTRGB4D(dst_2,py_2,3,6+8);
  420. PUTRGB4D(dst_1,py_1,3,6);
  421. CLOSEYUV2RGBFUNC(4)
  422. #if 0 // Currently unused
  423. // This is exactly the same code as yuv2rgb_c_32 except for the types of
  424. // r, g, b, dst_1, dst_2
  425. YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0)
  426. LOADCHROMA(0);
  427. PUTRGB(dst_1,py_1,0);
  428. PUTRGB(dst_2,py_2,0);
  429. LOADCHROMA(1);
  430. PUTRGB(dst_2,py_2,1);
  431. PUTRGB(dst_1,py_1,1);
  432. LOADCHROMA(2);
  433. PUTRGB(dst_1,py_1,2);
  434. PUTRGB(dst_2,py_2,2);
  435. LOADCHROMA(3);
  436. PUTRGB(dst_2,py_2,3);
  437. PUTRGB(dst_1,py_1,3);
  438. CLOSEYUV2RGBFUNC(8)
  439. #endif
  440. YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
  441. const uint8_t *d64 = dither_8x8_73[y&7];
  442. const uint8_t *d128 = dither_8x8_220[y&7];
  443. #define PUTRGB4DB(dst,src,i,o) \
  444. Y = src[2*i]; \
  445. dst[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
  446. Y = src[2*i+1]; \
  447. dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
  448. LOADCHROMA(0);
  449. PUTRGB4DB(dst_1,py_1,0,0);
  450. PUTRGB4DB(dst_2,py_2,0,0+8);
  451. LOADCHROMA(1);
  452. PUTRGB4DB(dst_2,py_2,1,2+8);
  453. PUTRGB4DB(dst_1,py_1,1,2);
  454. LOADCHROMA(2);
  455. PUTRGB4DB(dst_1,py_1,2,4);
  456. PUTRGB4DB(dst_2,py_2,2,4+8);
  457. LOADCHROMA(3);
  458. PUTRGB4DB(dst_2,py_2,3,6+8);
  459. PUTRGB4DB(dst_1,py_1,3,6);
  460. CLOSEYUV2RGBFUNC(8)
  461. YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
  462. const uint8_t *d128 = dither_8x8_220[y&7];
  463. char out_1 = 0, out_2 = 0;
  464. g= c->table_gU[128] + c->table_gV[128];
  465. #define PUTRGB1(out,src,i,o) \
  466. Y = src[2*i]; \
  467. out+= out + g[Y+d128[0+o]]; \
  468. Y = src[2*i+1]; \
  469. out+= out + g[Y+d128[1+o]];
  470. PUTRGB1(out_1,py_1,0,0);
  471. PUTRGB1(out_2,py_2,0,0+8);
  472. PUTRGB1(out_2,py_2,1,2+8);
  473. PUTRGB1(out_1,py_1,1,2);
  474. PUTRGB1(out_1,py_1,2,4);
  475. PUTRGB1(out_2,py_2,2,4+8);
  476. PUTRGB1(out_2,py_2,3,6+8);
  477. PUTRGB1(out_1,py_1,3,6);
  478. dst_1[0]= out_1;
  479. dst_2[0]= out_2;
  480. CLOSEYUV2RGBFUNC(1)
  481. SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
  482. {
  483. SwsFunc t = NULL;
  484. #if HAVE_MMX
  485. t = ff_yuv2rgb_init_mmx(c);
  486. #endif
  487. #if HAVE_VIS
  488. t = ff_yuv2rgb_init_vis(c);
  489. #endif
  490. #if CONFIG_MLIB
  491. t = ff_yuv2rgb_init_mlib(c);
  492. #endif
  493. #if HAVE_ALTIVEC
  494. if (c->flags & SWS_CPU_CAPS_ALTIVEC)
  495. t = ff_yuv2rgb_init_altivec(c);
  496. #endif
  497. #if ARCH_BFIN
  498. if (c->flags & SWS_CPU_CAPS_BFIN)
  499. t = ff_yuv2rgb_get_func_ptr_bfin(c);
  500. #endif
  501. if (t)
  502. return t;
  503. av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
  504. switch (c->dstFormat) {
  505. case PIX_FMT_BGR48BE:
  506. case PIX_FMT_BGR48LE: return yuv2rgb_c_bgr48;
  507. case PIX_FMT_RGB48BE:
  508. case PIX_FMT_RGB48LE: return yuv2rgb_c_48;
  509. case PIX_FMT_ARGB:
  510. case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c;
  511. case PIX_FMT_RGBA:
  512. case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32;
  513. case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb;
  514. case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr;
  515. case PIX_FMT_RGB565:
  516. case PIX_FMT_BGR565:
  517. case PIX_FMT_RGB555:
  518. case PIX_FMT_BGR555: return yuv2rgb_c_16;
  519. case PIX_FMT_RGB444:
  520. case PIX_FMT_BGR444: return yuv2rgb_c_12_ordered_dither;
  521. case PIX_FMT_RGB8:
  522. case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither;
  523. case PIX_FMT_RGB4:
  524. case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither;
  525. case PIX_FMT_RGB4_BYTE:
  526. case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither;
  527. case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither;
  528. default:
  529. assert(0);
  530. }
  531. return NULL;
  532. }
  533. static void fill_table(uint8_t* table[256], const int elemsize, const int inc, void *y_tab)
  534. {
  535. int i;
  536. int64_t cb = 0;
  537. uint8_t *y_table = y_tab;
  538. y_table -= elemsize * (inc >> 9);
  539. for (i = 0; i < 256; i++) {
  540. table[i] = y_table + elemsize * (cb >> 16);
  541. cb += inc;
  542. }
  543. }
  544. static void fill_gv_table(int table[256], const int elemsize, const int inc)
  545. {
  546. int i;
  547. int64_t cb = 0;
  548. int off = -(inc >> 9);
  549. for (i = 0; i < 256; i++) {
  550. table[i] = elemsize * (off + (cb >> 16));
  551. cb += inc;
  552. }
  553. }
  554. static uint16_t roundToInt16(int64_t f)
  555. {
  556. int r= (f + (1<<15))>>16;
  557. if (r<-0x7FFF) return 0x8000;
  558. else if (r> 0x7FFF) return 0x7FFF;
  559. else return r;
  560. }
  561. av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
  562. int brightness, int contrast, int saturation)
  563. {
  564. const int isRgb = c->dstFormat==PIX_FMT_RGB32
  565. || c->dstFormat==PIX_FMT_RGB32_1
  566. || c->dstFormat==PIX_FMT_BGR24
  567. || c->dstFormat==PIX_FMT_RGB565BE
  568. || c->dstFormat==PIX_FMT_RGB565LE
  569. || c->dstFormat==PIX_FMT_RGB555BE
  570. || c->dstFormat==PIX_FMT_RGB555LE
  571. || c->dstFormat==PIX_FMT_RGB444BE
  572. || c->dstFormat==PIX_FMT_RGB444LE
  573. || c->dstFormat==PIX_FMT_RGB8
  574. || c->dstFormat==PIX_FMT_RGB4
  575. || c->dstFormat==PIX_FMT_RGB4_BYTE
  576. || c->dstFormat==PIX_FMT_MONOBLACK;
  577. const int isNotNe = c->dstFormat==PIX_FMT_NE(RGB565LE,RGB565BE)
  578. || c->dstFormat==PIX_FMT_NE(RGB555LE,RGB555BE)
  579. || c->dstFormat==PIX_FMT_NE(RGB444LE,RGB444BE)
  580. || c->dstFormat==PIX_FMT_NE(BGR565LE,BGR565BE)
  581. || c->dstFormat==PIX_FMT_NE(BGR555LE,BGR555BE)
  582. || c->dstFormat==PIX_FMT_NE(BGR444LE,BGR444BE);
  583. const int bpp = c->dstFormatBpp;
  584. uint8_t *y_table;
  585. uint16_t *y_table16;
  586. uint32_t *y_table32;
  587. int i, base, rbase, gbase, bbase, abase, needAlpha;
  588. const int yoffs = fullRange ? 384 : 326;
  589. int64_t crv = inv_table[0];
  590. int64_t cbu = inv_table[1];
  591. int64_t cgu = -inv_table[2];
  592. int64_t cgv = -inv_table[3];
  593. int64_t cy = 1<<16;
  594. int64_t oy = 0;
  595. int64_t yb = 0;
  596. if (!fullRange) {
  597. cy = (cy*255) / 219;
  598. oy = 16<<16;
  599. } else {
  600. crv = (crv*224) / 255;
  601. cbu = (cbu*224) / 255;
  602. cgu = (cgu*224) / 255;
  603. cgv = (cgv*224) / 255;
  604. }
  605. cy = (cy *contrast ) >> 16;
  606. crv = (crv*contrast * saturation) >> 32;
  607. cbu = (cbu*contrast * saturation) >> 32;
  608. cgu = (cgu*contrast * saturation) >> 32;
  609. cgv = (cgv*contrast * saturation) >> 32;
  610. oy -= 256*brightness;
  611. c->uOffset= 0x0400040004000400LL;
  612. c->vOffset= 0x0400040004000400LL;
  613. c->yCoeff= roundToInt16(cy *8192) * 0x0001000100010001ULL;
  614. c->vrCoeff= roundToInt16(crv*8192) * 0x0001000100010001ULL;
  615. c->ubCoeff= roundToInt16(cbu*8192) * 0x0001000100010001ULL;
  616. c->vgCoeff= roundToInt16(cgv*8192) * 0x0001000100010001ULL;
  617. c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL;
  618. c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL;
  619. c->yuv2rgb_y_coeff = (int16_t)roundToInt16(cy <<13);
  620. c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
  621. c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
  622. c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
  623. c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
  624. c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
  625. //scale coefficients by cy
  626. crv = ((crv << 16) + 0x8000) / cy;
  627. cbu = ((cbu << 16) + 0x8000) / cy;
  628. cgu = ((cgu << 16) + 0x8000) / cy;
  629. cgv = ((cgv << 16) + 0x8000) / cy;
  630. av_free(c->yuvTable);
  631. switch (bpp) {
  632. case 1:
  633. c->yuvTable = av_malloc(1024);
  634. y_table = c->yuvTable;
  635. yb = -(384<<16) - oy;
  636. for (i = 0; i < 1024-110; i++) {
  637. y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
  638. yb += cy;
  639. }
  640. fill_table(c->table_gU, 1, cgu, y_table + yoffs);
  641. fill_gv_table(c->table_gV, 1, cgv);
  642. break;
  643. case 4:
  644. case 4|128:
  645. rbase = isRgb ? 3 : 0;
  646. gbase = 1;
  647. bbase = isRgb ? 0 : 3;
  648. c->yuvTable = av_malloc(1024*3);
  649. y_table = c->yuvTable;
  650. yb = -(384<<16) - oy;
  651. for (i = 0; i < 1024-110; i++) {
  652. int yval = av_clip_uint8((yb + 0x8000) >> 16);
  653. y_table[i+110 ] = (yval >> 7) << rbase;
  654. y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
  655. y_table[i+110+2048] = (yval >> 7) << bbase;
  656. yb += cy;
  657. }
  658. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  659. fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
  660. fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
  661. fill_gv_table(c->table_gV, 1, cgv);
  662. break;
  663. case 8:
  664. rbase = isRgb ? 5 : 0;
  665. gbase = isRgb ? 2 : 3;
  666. bbase = isRgb ? 0 : 6;
  667. c->yuvTable = av_malloc(1024*3);
  668. y_table = c->yuvTable;
  669. yb = -(384<<16) - oy;
  670. for (i = 0; i < 1024-38; i++) {
  671. int yval = av_clip_uint8((yb + 0x8000) >> 16);
  672. y_table[i+16 ] = ((yval + 18) / 36) << rbase;
  673. y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
  674. y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
  675. yb += cy;
  676. }
  677. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  678. fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
  679. fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
  680. fill_gv_table(c->table_gV, 1, cgv);
  681. break;
  682. case 12:
  683. rbase = isRgb ? 8 : 0;
  684. gbase = 4;
  685. bbase = isRgb ? 0 : 8;
  686. c->yuvTable = av_malloc(1024*3*2);
  687. y_table16 = c->yuvTable;
  688. yb = -(384<<16) - oy;
  689. for (i = 0; i < 1024; i++) {
  690. uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
  691. y_table16[i ] = (yval >> 4) << rbase;
  692. y_table16[i+1024] = (yval >> 4) << gbase;
  693. y_table16[i+2048] = (yval >> 4) << bbase;
  694. yb += cy;
  695. }
  696. if (isNotNe)
  697. for (i = 0; i < 1024*3; i++)
  698. y_table16[i] = av_bswap16(y_table16[i]);
  699. fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
  700. fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
  701. fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
  702. fill_gv_table(c->table_gV, 2, cgv);
  703. break;
  704. case 15:
  705. case 16:
  706. rbase = isRgb ? bpp - 5 : 0;
  707. gbase = 5;
  708. bbase = isRgb ? 0 : (bpp - 5);
  709. c->yuvTable = av_malloc(1024*3*2);
  710. y_table16 = c->yuvTable;
  711. yb = -(384<<16) - oy;
  712. for (i = 0; i < 1024; i++) {
  713. uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
  714. y_table16[i ] = (yval >> 3) << rbase;
  715. y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
  716. y_table16[i+2048] = (yval >> 3) << bbase;
  717. yb += cy;
  718. }
  719. if(isNotNe)
  720. for (i = 0; i < 1024*3; i++)
  721. y_table16[i] = av_bswap16(y_table16[i]);
  722. fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
  723. fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
  724. fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
  725. fill_gv_table(c->table_gV, 2, cgv);
  726. break;
  727. case 24:
  728. case 48:
  729. c->yuvTable = av_malloc(1024);
  730. y_table = c->yuvTable;
  731. yb = -(384<<16) - oy;
  732. for (i = 0; i < 1024; i++) {
  733. y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
  734. yb += cy;
  735. }
  736. fill_table(c->table_rV, 1, crv, y_table + yoffs);
  737. fill_table(c->table_gU, 1, cgu, y_table + yoffs);
  738. fill_table(c->table_bU, 1, cbu, y_table + yoffs);
  739. fill_gv_table(c->table_gV, 1, cgv);
  740. break;
  741. case 32:
  742. base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
  743. rbase = base + (isRgb ? 16 : 0);
  744. gbase = base + 8;
  745. bbase = base + (isRgb ? 0 : 16);
  746. needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
  747. if (!needAlpha)
  748. abase = (base + 24) & 31;
  749. c->yuvTable = av_malloc(1024*3*4);
  750. y_table32 = c->yuvTable;
  751. yb = -(384<<16) - oy;
  752. for (i = 0; i < 1024; i++) {
  753. uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
  754. y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase));
  755. y_table32[i+1024] = yval << gbase;
  756. y_table32[i+2048] = yval << bbase;
  757. yb += cy;
  758. }
  759. fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
  760. fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
  761. fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
  762. fill_gv_table(c->table_gV, 4, cgv);
  763. break;
  764. default:
  765. c->yuvTable = NULL;
  766. av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
  767. return -1;
  768. }
  769. return 0;
  770. }