You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

967 lines
29KB

  1. /*
  2. * software RGB to RGB converter
  3. * pluralize by software PAL8 to RGB converter
  4. * software YUV to YUV converter
  5. * software YUV to RGB converter
  6. * Written by Nick Kurshev.
  7. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  8. * lot of big-endian byte order fixes by Alex Beregszaszi
  9. *
  10. * This file is part of Libav.
  11. *
  12. * Libav is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * Libav is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with Libav; if not, write to the Free Software
  24. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25. */
  26. #include <stddef.h>
  27. #include "libavutil/attributes.h"
  28. static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
  29. int src_size)
  30. {
  31. uint8_t *dest = dst;
  32. const uint8_t *s = src;
  33. const uint8_t *end = s + src_size;
  34. while (s < end) {
  35. #if HAVE_BIGENDIAN
  36. /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
  37. *dest++ = 255;
  38. *dest++ = s[2];
  39. *dest++ = s[1];
  40. *dest++ = s[0];
  41. s += 3;
  42. #else
  43. *dest++ = *s++;
  44. *dest++ = *s++;
  45. *dest++ = *s++;
  46. *dest++ = 255;
  47. #endif
  48. }
  49. }
  50. static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
  51. int src_size)
  52. {
  53. uint8_t *dest = dst;
  54. const uint8_t *s = src;
  55. const uint8_t *end = s + src_size;
  56. while (s < end) {
  57. #if HAVE_BIGENDIAN
  58. /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
  59. s++;
  60. dest[2] = *s++;
  61. dest[1] = *s++;
  62. dest[0] = *s++;
  63. dest += 3;
  64. #else
  65. *dest++ = *s++;
  66. *dest++ = *s++;
  67. *dest++ = *s++;
  68. s++;
  69. #endif
  70. }
  71. }
  72. /*
  73. * original by Strepto/Astral
  74. * ported to gcc & bugfixed: A'rpi
  75. * MMXEXT, 3DNOW optimization by Nick Kurshev
  76. * 32-bit C version, and and&add trick by Michael Niedermayer
  77. */
  78. static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
  79. {
  80. register uint8_t *d = dst;
  81. register const uint8_t *s = src;
  82. register const uint8_t *end = s + src_size;
  83. const uint8_t *mm_end = end - 3;
  84. while (s < mm_end) {
  85. register unsigned x = *((const uint32_t *)s);
  86. *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
  87. d += 4;
  88. s += 4;
  89. }
  90. if (s < end) {
  91. register unsigned short x = *((const uint16_t *)s);
  92. *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
  93. }
  94. }
  95. static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
  96. {
  97. register uint8_t *d = dst;
  98. register const uint8_t *s = src;
  99. register const uint8_t *end = s + src_size;
  100. const uint8_t *mm_end = end - 3;
  101. while (s < mm_end) {
  102. register uint32_t x = *((const uint32_t *)s);
  103. *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
  104. s += 4;
  105. d += 4;
  106. }
  107. if (s < end) {
  108. register uint16_t x = *((const uint16_t *)s);
  109. *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
  110. }
  111. }
  112. static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
  113. {
  114. uint16_t *d = (uint16_t *)dst;
  115. const uint8_t *s = src;
  116. const uint8_t *end = s + src_size;
  117. while (s < end) {
  118. register int rgb = *(const uint32_t *)s;
  119. s += 4;
  120. *d++ = ((rgb & 0xFF) >> 3) +
  121. ((rgb & 0xFC00) >> 5) +
  122. ((rgb & 0xF80000) >> 8);
  123. }
  124. }
  125. static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
  126. int src_size)
  127. {
  128. uint16_t *d = (uint16_t *)dst;
  129. const uint8_t *s = src;
  130. const uint8_t *end = s + src_size;
  131. while (s < end) {
  132. register int rgb = *(const uint32_t *)s;
  133. s += 4;
  134. *d++ = ((rgb & 0xF8) << 8) +
  135. ((rgb & 0xFC00) >> 5) +
  136. ((rgb & 0xF80000) >> 19);
  137. }
  138. }
  139. static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
  140. {
  141. uint16_t *d = (uint16_t *)dst;
  142. const uint8_t *s = src;
  143. const uint8_t *end = s + src_size;
  144. while (s < end) {
  145. register int rgb = *(const uint32_t *)s;
  146. s += 4;
  147. *d++ = ((rgb & 0xFF) >> 3) +
  148. ((rgb & 0xF800) >> 6) +
  149. ((rgb & 0xF80000) >> 9);
  150. }
  151. }
  152. static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
  153. int src_size)
  154. {
  155. uint16_t *d = (uint16_t *)dst;
  156. const uint8_t *s = src;
  157. const uint8_t *end = s + src_size;
  158. while (s < end) {
  159. register int rgb = *(const uint32_t *)s;
  160. s += 4;
  161. *d++ = ((rgb & 0xF8) << 7) +
  162. ((rgb & 0xF800) >> 6) +
  163. ((rgb & 0xF80000) >> 19);
  164. }
  165. }
  166. static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
  167. int src_size)
  168. {
  169. uint16_t *d = (uint16_t *)dst;
  170. const uint8_t *s = src;
  171. const uint8_t *end = s + src_size;
  172. while (s < end) {
  173. const int b = *s++;
  174. const int g = *s++;
  175. const int r = *s++;
  176. *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
  177. }
  178. }
  179. static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
  180. {
  181. uint16_t *d = (uint16_t *)dst;
  182. const uint8_t *s = src;
  183. const uint8_t *end = s + src_size;
  184. while (s < end) {
  185. const int r = *s++;
  186. const int g = *s++;
  187. const int b = *s++;
  188. *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
  189. }
  190. }
  191. static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
  192. int src_size)
  193. {
  194. uint16_t *d = (uint16_t *)dst;
  195. const uint8_t *s = src;
  196. const uint8_t *end = s + src_size;
  197. while (s < end) {
  198. const int b = *s++;
  199. const int g = *s++;
  200. const int r = *s++;
  201. *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
  202. }
  203. }
  204. static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
  205. {
  206. uint16_t *d = (uint16_t *)dst;
  207. const uint8_t *s = src;
  208. const uint8_t *end = s + src_size;
  209. while (s < end) {
  210. const int r = *s++;
  211. const int g = *s++;
  212. const int b = *s++;
  213. *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
  214. }
  215. }
  216. /*
  217. * I use less accurate approximation here by simply left-shifting the input
  218. * value and filling the low order bits with zeroes. This method improves PNG
  219. * compression but this scheme cannot reproduce white exactly, since it does
  220. * not generate an all-ones maximum value; the net effect is to darken the
  221. * image slightly.
  222. *
  223. * The better method should be "left bit replication":
  224. *
  225. * 4 3 2 1 0
  226. * ---------
  227. * 1 1 0 1 1
  228. *
  229. * 7 6 5 4 3 2 1 0
  230. * ----------------
  231. * 1 1 0 1 1 1 1 0
  232. * |=======| |===|
  233. * | leftmost bits repeated to fill open bits
  234. * |
  235. * original bits
  236. */
  237. static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
  238. int src_size)
  239. {
  240. uint8_t *d = dst;
  241. const uint16_t *s = (const uint16_t *)src;
  242. const uint16_t *end = s + src_size / 2;
  243. while (s < end) {
  244. register uint16_t bgr = *s++;
  245. *d++ = (bgr & 0x1F) << 3;
  246. *d++ = (bgr & 0x3E0) >> 2;
  247. *d++ = (bgr & 0x7C00) >> 7;
  248. }
  249. }
  250. static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
  251. int src_size)
  252. {
  253. uint8_t *d = (uint8_t *)dst;
  254. const uint16_t *s = (const uint16_t *)src;
  255. const uint16_t *end = s + src_size / 2;
  256. while (s < end) {
  257. register uint16_t bgr = *s++;
  258. *d++ = (bgr & 0x1F) << 3;
  259. *d++ = (bgr & 0x7E0) >> 3;
  260. *d++ = (bgr & 0xF800) >> 8;
  261. }
  262. }
  263. static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
  264. {
  265. uint8_t *d = dst;
  266. const uint16_t *s = (const uint16_t *)src;
  267. const uint16_t *end = s + src_size / 2;
  268. while (s < end) {
  269. register uint16_t bgr = *s++;
  270. #if HAVE_BIGENDIAN
  271. *d++ = 255;
  272. *d++ = (bgr & 0x7C00) >> 7;
  273. *d++ = (bgr & 0x3E0) >> 2;
  274. *d++ = (bgr & 0x1F) << 3;
  275. #else
  276. *d++ = (bgr & 0x1F) << 3;
  277. *d++ = (bgr & 0x3E0) >> 2;
  278. *d++ = (bgr & 0x7C00) >> 7;
  279. *d++ = 255;
  280. #endif
  281. }
  282. }
  283. static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
  284. {
  285. uint8_t *d = dst;
  286. const uint16_t *s = (const uint16_t *)src;
  287. const uint16_t *end = s + src_size / 2;
  288. while (s < end) {
  289. register uint16_t bgr = *s++;
  290. #if HAVE_BIGENDIAN
  291. *d++ = 255;
  292. *d++ = (bgr & 0xF800) >> 8;
  293. *d++ = (bgr & 0x7E0) >> 3;
  294. *d++ = (bgr & 0x1F) << 3;
  295. #else
  296. *d++ = (bgr & 0x1F) << 3;
  297. *d++ = (bgr & 0x7E0) >> 3;
  298. *d++ = (bgr & 0xF800) >> 8;
  299. *d++ = 255;
  300. #endif
  301. }
  302. }
  303. static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
  304. int src_size)
  305. {
  306. int idx = 15 - src_size;
  307. const uint8_t *s = src - idx;
  308. uint8_t *d = dst - idx;
  309. for (; idx < 15; idx += 4) {
  310. register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
  311. v &= 0xff00ff;
  312. *(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16);
  313. }
  314. }
  315. static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
  316. {
  317. unsigned i;
  318. for (i = 0; i < src_size; i += 3) {
  319. register uint8_t x = src[i + 2];
  320. dst[i + 1] = src[i + 1];
  321. dst[i + 2] = src[i + 0];
  322. dst[i + 0] = x;
  323. }
  324. }
  325. static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  326. const uint8_t *vsrc, uint8_t *dst,
  327. int width, int height,
  328. int lumStride, int chromStride,
  329. int dstStride, int vertLumPerChroma)
  330. {
  331. int y, i;
  332. const int chromWidth = width >> 1;
  333. for (y = 0; y < height; y++) {
  334. #if HAVE_FAST_64BIT
  335. uint64_t *ldst = (uint64_t *)dst;
  336. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  337. for (i = 0; i < chromWidth; i += 2) {
  338. uint64_t k = yc[0] + (uc[0] << 8) +
  339. (yc[1] << 16) + ((unsigned) vc[0] << 24);
  340. uint64_t l = yc[2] + (uc[1] << 8) +
  341. (yc[3] << 16) + ((unsigned) vc[1] << 24);
  342. *ldst++ = k + (l << 32);
  343. yc += 4;
  344. uc += 2;
  345. vc += 2;
  346. }
  347. #else
  348. int *idst = (int32_t *)dst;
  349. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  350. for (i = 0; i < chromWidth; i++) {
  351. #if HAVE_BIGENDIAN
  352. *idst++ = (yc[0] << 24) + (uc[0] << 16) +
  353. (yc[1] << 8) + (vc[0] << 0);
  354. #else
  355. *idst++ = yc[0] + (uc[0] << 8) +
  356. (yc[1] << 16) + (vc[0] << 24);
  357. #endif
  358. yc += 2;
  359. uc++;
  360. vc++;
  361. }
  362. #endif
  363. if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
  364. usrc += chromStride;
  365. vsrc += chromStride;
  366. }
  367. ysrc += lumStride;
  368. dst += dstStride;
  369. }
  370. }
  371. /**
  372. * Height should be a multiple of 2 and width should be a multiple of 16.
  373. * (If this is a problem for anyone then tell me, and I will fix it.)
  374. */
  375. static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  376. const uint8_t *vsrc, uint8_t *dst,
  377. int width, int height, int lumStride,
  378. int chromStride, int dstStride)
  379. {
  380. //FIXME interpolate chroma
  381. yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  382. chromStride, dstStride, 2);
  383. }
  384. static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  385. const uint8_t *vsrc, uint8_t *dst,
  386. int width, int height,
  387. int lumStride, int chromStride,
  388. int dstStride, int vertLumPerChroma)
  389. {
  390. int y, i;
  391. const int chromWidth = width >> 1;
  392. for (y = 0; y < height; y++) {
  393. #if HAVE_FAST_64BIT
  394. uint64_t *ldst = (uint64_t *)dst;
  395. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  396. for (i = 0; i < chromWidth; i += 2) {
  397. uint64_t k = uc[0] + (yc[0] << 8) +
  398. (vc[0] << 16) + ((unsigned) yc[1] << 24);
  399. uint64_t l = uc[1] + (yc[2] << 8) +
  400. (vc[1] << 16) + ((unsigned) yc[3] << 24);
  401. *ldst++ = k + (l << 32);
  402. yc += 4;
  403. uc += 2;
  404. vc += 2;
  405. }
  406. #else
  407. int *idst = (int32_t *)dst;
  408. const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
  409. for (i = 0; i < chromWidth; i++) {
  410. #if HAVE_BIGENDIAN
  411. *idst++ = (uc[0] << 24) + (yc[0] << 16) +
  412. (vc[0] << 8) + (yc[1] << 0);
  413. #else
  414. *idst++ = uc[0] + (yc[0] << 8) +
  415. (vc[0] << 16) + (yc[1] << 24);
  416. #endif
  417. yc += 2;
  418. uc++;
  419. vc++;
  420. }
  421. #endif
  422. if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
  423. usrc += chromStride;
  424. vsrc += chromStride;
  425. }
  426. ysrc += lumStride;
  427. dst += dstStride;
  428. }
  429. }
  430. /**
  431. * Height should be a multiple of 2 and width should be a multiple of 16
  432. * (If this is a problem for anyone then tell me, and I will fix it.)
  433. */
  434. static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  435. const uint8_t *vsrc, uint8_t *dst,
  436. int width, int height, int lumStride,
  437. int chromStride, int dstStride)
  438. {
  439. //FIXME interpolate chroma
  440. yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  441. chromStride, dstStride, 2);
  442. }
  443. /**
  444. * Width should be a multiple of 16.
  445. */
  446. static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  447. const uint8_t *vsrc, uint8_t *dst,
  448. int width, int height, int lumStride,
  449. int chromStride, int dstStride)
  450. {
  451. yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  452. chromStride, dstStride, 1);
  453. }
  454. /**
  455. * Width should be a multiple of 16.
  456. */
  457. static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  458. const uint8_t *vsrc, uint8_t *dst,
  459. int width, int height, int lumStride,
  460. int chromStride, int dstStride)
  461. {
  462. yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
  463. chromStride, dstStride, 1);
  464. }
  465. /**
  466. * Height should be a multiple of 2 and width should be a multiple of 16.
  467. * (If this is a problem for anyone then tell me, and I will fix it.)
  468. */
  469. static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
  470. uint8_t *udst, uint8_t *vdst,
  471. int width, int height, int lumStride,
  472. int chromStride, int srcStride)
  473. {
  474. int y;
  475. const int chromWidth = width >> 1;
  476. for (y = 0; y < height; y += 2) {
  477. int i;
  478. for (i = 0; i < chromWidth; i++) {
  479. ydst[2 * i + 0] = src[4 * i + 0];
  480. udst[i] = src[4 * i + 1];
  481. ydst[2 * i + 1] = src[4 * i + 2];
  482. vdst[i] = src[4 * i + 3];
  483. }
  484. ydst += lumStride;
  485. src += srcStride;
  486. for (i = 0; i < chromWidth; i++) {
  487. ydst[2 * i + 0] = src[4 * i + 0];
  488. ydst[2 * i + 1] = src[4 * i + 2];
  489. }
  490. udst += chromStride;
  491. vdst += chromStride;
  492. ydst += lumStride;
  493. src += srcStride;
  494. }
  495. }
  496. static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
  497. int srcHeight, int srcStride, int dstStride)
  498. {
  499. int x, y;
  500. dst[0] = src[0];
  501. // first line
  502. for (x = 0; x < srcWidth - 1; x++) {
  503. dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
  504. dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
  505. }
  506. dst[2 * srcWidth - 1] = src[srcWidth - 1];
  507. dst += dstStride;
  508. for (y = 1; y < srcHeight; y++) {
  509. const int mmxSize = 1;
  510. dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
  511. dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
  512. for (x = mmxSize - 1; x < srcWidth - 1; x++) {
  513. dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
  514. dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
  515. dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
  516. dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
  517. }
  518. dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
  519. dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
  520. dst += dstStride * 2;
  521. src += srcStride;
  522. }
  523. // last line
  524. dst[0] = src[0];
  525. for (x = 0; x < srcWidth - 1; x++) {
  526. dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
  527. dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
  528. }
  529. dst[2 * srcWidth - 1] = src[srcWidth - 1];
  530. }
  531. /**
  532. * Height should be a multiple of 2 and width should be a multiple of 16.
  533. * (If this is a problem for anyone then tell me, and I will fix it.)
  534. * Chrominance data is only taken from every second line, others are ignored.
  535. * FIXME: Write HQ version.
  536. */
  537. static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  538. uint8_t *udst, uint8_t *vdst,
  539. int width, int height, int lumStride,
  540. int chromStride, int srcStride)
  541. {
  542. int y;
  543. const int chromWidth = width >> 1;
  544. for (y = 0; y < height; y += 2) {
  545. int i;
  546. for (i = 0; i < chromWidth; i++) {
  547. udst[i] = src[4 * i + 0];
  548. ydst[2 * i + 0] = src[4 * i + 1];
  549. vdst[i] = src[4 * i + 2];
  550. ydst[2 * i + 1] = src[4 * i + 3];
  551. }
  552. ydst += lumStride;
  553. src += srcStride;
  554. for (i = 0; i < chromWidth; i++) {
  555. ydst[2 * i + 0] = src[4 * i + 1];
  556. ydst[2 * i + 1] = src[4 * i + 3];
  557. }
  558. udst += chromStride;
  559. vdst += chromStride;
  560. ydst += lumStride;
  561. src += srcStride;
  562. }
  563. }
  564. /**
  565. * Height should be a multiple of 2 and width should be a multiple of 2.
  566. * (If this is a problem for anyone then tell me, and I will fix it.)
  567. * Chrominance data is only taken from every second line,
  568. * others are ignored in the C version.
  569. * FIXME: Write HQ version.
  570. */
  571. void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
  572. uint8_t *vdst, int width, int height, int lumStride,
  573. int chromStride, int srcStride)
  574. {
  575. int y;
  576. const int chromWidth = width >> 1;
  577. for (y = 0; y < height; y += 2) {
  578. int i;
  579. for (i = 0; i < chromWidth; i++) {
  580. unsigned int b = src[6 * i + 0];
  581. unsigned int g = src[6 * i + 1];
  582. unsigned int r = src[6 * i + 2];
  583. unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
  584. unsigned int V = ((RV * r + GV * g + BV * b) >> RGB2YUV_SHIFT) + 128;
  585. unsigned int U = ((RU * r + GU * g + BU * b) >> RGB2YUV_SHIFT) + 128;
  586. udst[i] = U;
  587. vdst[i] = V;
  588. ydst[2 * i] = Y;
  589. b = src[6 * i + 3];
  590. g = src[6 * i + 4];
  591. r = src[6 * i + 5];
  592. Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
  593. ydst[2 * i + 1] = Y;
  594. }
  595. ydst += lumStride;
  596. src += srcStride;
  597. for (i = 0; i < chromWidth; i++) {
  598. unsigned int b = src[6 * i + 0];
  599. unsigned int g = src[6 * i + 1];
  600. unsigned int r = src[6 * i + 2];
  601. unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
  602. ydst[2 * i] = Y;
  603. b = src[6 * i + 3];
  604. g = src[6 * i + 4];
  605. r = src[6 * i + 5];
  606. Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
  607. ydst[2 * i + 1] = Y;
  608. }
  609. udst += chromStride;
  610. vdst += chromStride;
  611. ydst += lumStride;
  612. src += srcStride;
  613. }
  614. }
  615. static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
  616. uint8_t *dest, int width, int height,
  617. int src1Stride, int src2Stride, int dstStride)
  618. {
  619. int h;
  620. for (h = 0; h < height; h++) {
  621. int w;
  622. for (w = 0; w < width; w++) {
  623. dest[2 * w + 0] = src1[w];
  624. dest[2 * w + 1] = src2[w];
  625. }
  626. dest += dstStride;
  627. src1 += src1Stride;
  628. src2 += src2Stride;
  629. }
  630. }
  631. static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
  632. int width, int height, int srcStride,
  633. int dst1Stride, int dst2Stride)
  634. {
  635. int h;
  636. for (h = 0; h < height; h++) {
  637. int w;
  638. for (w = 0; w < width; w++) {
  639. dst1[w] = src[2 * w + 0];
  640. dst2[w] = src[2 * w + 1];
  641. }
  642. src += srcStride;
  643. dst1 += dst1Stride;
  644. dst2 += dst2Stride;
  645. }
  646. }
  647. static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
  648. uint8_t *dst1, uint8_t *dst2,
  649. int width, int height,
  650. int srcStride1, int srcStride2,
  651. int dstStride1, int dstStride2)
  652. {
  653. int x, y;
  654. int w = width / 2;
  655. int h = height / 2;
  656. for (y = 0; y < h; y++) {
  657. const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
  658. uint8_t *d = dst1 + dstStride1 * y;
  659. for (x = 0; x < w; x++)
  660. d[2 * x] = d[2 * x + 1] = s1[x];
  661. }
  662. for (y = 0; y < h; y++) {
  663. const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
  664. uint8_t *d = dst2 + dstStride2 * y;
  665. for (x = 0; x < w; x++)
  666. d[2 * x] = d[2 * x + 1] = s2[x];
  667. }
  668. }
  669. static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
  670. const uint8_t *src3, uint8_t *dst,
  671. int width, int height,
  672. int srcStride1, int srcStride2,
  673. int srcStride3, int dstStride)
  674. {
  675. int x, y;
  676. int w = width / 2;
  677. int h = height;
  678. for (y = 0; y < h; y++) {
  679. const uint8_t *yp = src1 + srcStride1 * y;
  680. const uint8_t *up = src2 + srcStride2 * (y >> 2);
  681. const uint8_t *vp = src3 + srcStride3 * (y >> 2);
  682. uint8_t *d = dst + dstStride * y;
  683. for (x = 0; x < w; x++) {
  684. const int x2 = x << 2;
  685. d[8 * x + 0] = yp[x2];
  686. d[8 * x + 1] = up[x];
  687. d[8 * x + 2] = yp[x2 + 1];
  688. d[8 * x + 3] = vp[x];
  689. d[8 * x + 4] = yp[x2 + 2];
  690. d[8 * x + 5] = up[x];
  691. d[8 * x + 6] = yp[x2 + 3];
  692. d[8 * x + 7] = vp[x];
  693. }
  694. }
  695. }
  696. static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
  697. {
  698. dst += count;
  699. src += count * 2;
  700. count = -count;
  701. while (count < 0) {
  702. dst[count] = src[2 * count];
  703. count++;
  704. }
  705. }
  706. static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
  707. int count)
  708. {
  709. dst0 += count;
  710. dst1 += count;
  711. src += count * 4;
  712. count = -count;
  713. while (count < 0) {
  714. dst0[count] = src[4 * count + 0];
  715. dst1[count] = src[4 * count + 2];
  716. count++;
  717. }
  718. }
  719. static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
  720. uint8_t *dst0, uint8_t *dst1, int count)
  721. {
  722. dst0 += count;
  723. dst1 += count;
  724. src0 += count * 4;
  725. src1 += count * 4;
  726. count = -count;
  727. while (count < 0) {
  728. dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
  729. dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
  730. count++;
  731. }
  732. }
  733. static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
  734. int count)
  735. {
  736. dst0 += count;
  737. dst1 += count;
  738. src += count * 4;
  739. count = -count;
  740. src++;
  741. while (count < 0) {
  742. dst0[count] = src[4 * count + 0];
  743. dst1[count] = src[4 * count + 2];
  744. count++;
  745. }
  746. }
  747. static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
  748. uint8_t *dst0, uint8_t *dst1, int count)
  749. {
  750. dst0 += count;
  751. dst1 += count;
  752. src0 += count * 4;
  753. src1 += count * 4;
  754. count = -count;
  755. src0++;
  756. src1++;
  757. while (count < 0) {
  758. dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
  759. dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
  760. count++;
  761. }
  762. }
  763. static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  764. const uint8_t *src, int width, int height,
  765. int lumStride, int chromStride, int srcStride)
  766. {
  767. int y;
  768. const int chromWidth = AV_CEIL_RSHIFT(width, 1);
  769. for (y = 0; y < height; y++) {
  770. extract_even_c(src, ydst, width);
  771. if (y & 1) {
  772. extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
  773. udst += chromStride;
  774. vdst += chromStride;
  775. }
  776. src += srcStride;
  777. ydst += lumStride;
  778. }
  779. }
  780. static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  781. const uint8_t *src, int width, int height,
  782. int lumStride, int chromStride, int srcStride)
  783. {
  784. int y;
  785. const int chromWidth = AV_CEIL_RSHIFT(width, 1);
  786. for (y = 0; y < height; y++) {
  787. extract_even_c(src, ydst, width);
  788. extract_odd2_c(src, udst, vdst, chromWidth);
  789. src += srcStride;
  790. ydst += lumStride;
  791. udst += chromStride;
  792. vdst += chromStride;
  793. }
  794. }
  795. static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  796. const uint8_t *src, int width, int height,
  797. int lumStride, int chromStride, int srcStride)
  798. {
  799. int y;
  800. const int chromWidth = AV_CEIL_RSHIFT(width, 1);
  801. for (y = 0; y < height; y++) {
  802. extract_even_c(src + 1, ydst, width);
  803. if (y & 1) {
  804. extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
  805. udst += chromStride;
  806. vdst += chromStride;
  807. }
  808. src += srcStride;
  809. ydst += lumStride;
  810. }
  811. }
  812. static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  813. const uint8_t *src, int width, int height,
  814. int lumStride, int chromStride, int srcStride)
  815. {
  816. int y;
  817. const int chromWidth = AV_CEIL_RSHIFT(width, 1);
  818. for (y = 0; y < height; y++) {
  819. extract_even_c(src + 1, ydst, width);
  820. extract_even2_c(src, udst, vdst, chromWidth);
  821. src += srcStride;
  822. ydst += lumStride;
  823. udst += chromStride;
  824. vdst += chromStride;
  825. }
  826. }
  827. static av_cold void rgb2rgb_init_c(void)
  828. {
  829. rgb15to16 = rgb15to16_c;
  830. rgb15tobgr24 = rgb15tobgr24_c;
  831. rgb15to32 = rgb15to32_c;
  832. rgb16tobgr24 = rgb16tobgr24_c;
  833. rgb16to32 = rgb16to32_c;
  834. rgb16to15 = rgb16to15_c;
  835. rgb24tobgr16 = rgb24tobgr16_c;
  836. rgb24tobgr15 = rgb24tobgr15_c;
  837. rgb24tobgr32 = rgb24tobgr32_c;
  838. rgb32to16 = rgb32to16_c;
  839. rgb32to15 = rgb32to15_c;
  840. rgb32tobgr24 = rgb32tobgr24_c;
  841. rgb24to15 = rgb24to15_c;
  842. rgb24to16 = rgb24to16_c;
  843. rgb24tobgr24 = rgb24tobgr24_c;
  844. shuffle_bytes_2103 = shuffle_bytes_2103_c;
  845. rgb32tobgr16 = rgb32tobgr16_c;
  846. rgb32tobgr15 = rgb32tobgr15_c;
  847. yv12toyuy2 = yv12toyuy2_c;
  848. yv12touyvy = yv12touyvy_c;
  849. yuv422ptoyuy2 = yuv422ptoyuy2_c;
  850. yuv422ptouyvy = yuv422ptouyvy_c;
  851. yuy2toyv12 = yuy2toyv12_c;
  852. planar2x = planar2x_c;
  853. rgb24toyv12 = rgb24toyv12_c;
  854. interleaveBytes = interleaveBytes_c;
  855. deinterleaveBytes = deinterleaveBytes_c;
  856. vu9_to_vu12 = vu9_to_vu12_c;
  857. yvu9_to_yuy2 = yvu9_to_yuy2_c;
  858. uyvytoyuv420 = uyvytoyuv420_c;
  859. uyvytoyuv422 = uyvytoyuv422_c;
  860. yuyvtoyuv420 = yuyvtoyuv420_c;
  861. yuyvtoyuv422 = yuyvtoyuv422_c;
  862. }