You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

704 lines
28KB

  1. /*
  2. * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <inttypes.h>
  21. #include <string.h>
  22. #include <math.h>
  23. #include <stdio.h>
  24. #include "config.h"
  25. #include <assert.h>
  26. #include "swscale.h"
  27. #include "swscale_internal.h"
  28. #include "rgb2rgb.h"
  29. #include "libavutil/avassert.h"
  30. #include "libavutil/intreadwrite.h"
  31. #include "libavutil/cpu.h"
  32. #include "libavutil/avutil.h"
  33. #include "libavutil/mathematics.h"
  34. #include "libavutil/bswap.h"
  35. #include "libavutil/pixdesc.h"
  36. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
  37. { 36, 68, 60, 92, 34, 66, 58, 90,},
  38. { 100, 4,124, 28, 98, 2,122, 26,},
  39. { 52, 84, 44, 76, 50, 82, 42, 74,},
  40. { 116, 20,108, 12,114, 18,106, 10,},
  41. { 32, 64, 56, 88, 38, 70, 62, 94,},
  42. { 96, 0,120, 24,102, 6,126, 30,},
  43. { 48, 80, 40, 72, 54, 86, 46, 78,},
  44. { 112, 16,104, 8,118, 22,110, 14,},
  45. };
  46. DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
  47. { 64, 64, 64, 64, 64, 64, 64, 64 };
  48. static av_always_inline void fillPlane(uint8_t* plane, int stride,
  49. int width, int height,
  50. int y, uint8_t val)
  51. {
  52. int i;
  53. uint8_t *ptr = plane + stride*y;
  54. for (i=0; i<height; i++) {
  55. memset(ptr, val, width);
  56. ptr += stride;
  57. }
  58. }
  59. static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
  60. const int16_t *filter,
  61. const int32_t *filterPos, int filterSize)
  62. {
  63. int i;
  64. int32_t *dst = (int32_t *) _dst;
  65. const uint16_t *src = (const uint16_t *) _src;
  66. int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  67. int sh = bits - 4;
  68. if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
  69. sh= 9;
  70. for (i = 0; i < dstW; i++) {
  71. int j;
  72. int srcPos = filterPos[i];
  73. int val = 0;
  74. for (j = 0; j < filterSize; j++) {
  75. val += src[srcPos + j] * filter[filterSize * i + j];
  76. }
  77. // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
  78. dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
  79. }
  80. }
  81. static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
  82. const int16_t *filter,
  83. const int32_t *filterPos, int filterSize)
  84. {
  85. int i;
  86. const uint16_t *src = (const uint16_t *) _src;
  87. int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  88. if(sh<15)
  89. sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  90. for (i = 0; i < dstW; i++) {
  91. int j;
  92. int srcPos = filterPos[i];
  93. int val = 0;
  94. for (j = 0; j < filterSize; j++) {
  95. val += src[srcPos + j] * filter[filterSize * i + j];
  96. }
  97. // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
  98. dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
  99. }
  100. }
  101. // bilinear / bicubic scaling
  102. static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
  103. const int16_t *filter, const int32_t *filterPos,
  104. int filterSize)
  105. {
  106. int i;
  107. for (i=0; i<dstW; i++) {
  108. int j;
  109. int srcPos= filterPos[i];
  110. int val=0;
  111. for (j=0; j<filterSize; j++) {
  112. val += ((int)src[srcPos + j])*filter[filterSize*i + j];
  113. }
  114. //filter += hFilterSize;
  115. dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
  116. //dst[i] = val>>7;
  117. }
  118. }
  119. static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
  120. const int16_t *filter, const int32_t *filterPos,
  121. int filterSize)
  122. {
  123. int i;
  124. int32_t *dst = (int32_t *) _dst;
  125. for (i=0; i<dstW; i++) {
  126. int j;
  127. int srcPos= filterPos[i];
  128. int val=0;
  129. for (j=0; j<filterSize; j++) {
  130. val += ((int)src[srcPos + j])*filter[filterSize*i + j];
  131. }
  132. //filter += hFilterSize;
  133. dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
  134. //dst[i] = val>>7;
  135. }
  136. }
  137. //FIXME all pal and rgb srcFormats could do this convertion as well
  138. //FIXME all scalers more complex than bilinear could do half of this transform
  139. static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
  140. {
  141. int i;
  142. for (i = 0; i < width; i++) {
  143. dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
  144. dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
  145. }
  146. }
  147. static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
  148. {
  149. int i;
  150. for (i = 0; i < width; i++) {
  151. dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
  152. dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
  153. }
  154. }
  155. static void lumRangeToJpeg_c(int16_t *dst, int width)
  156. {
  157. int i;
  158. for (i = 0; i < width; i++)
  159. dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
  160. }
  161. static void lumRangeFromJpeg_c(int16_t *dst, int width)
  162. {
  163. int i;
  164. for (i = 0; i < width; i++)
  165. dst[i] = (dst[i]*14071 + 33561947)>>14;
  166. }
  167. static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
  168. {
  169. int i;
  170. int32_t *dstU = (int32_t *) _dstU;
  171. int32_t *dstV = (int32_t *) _dstV;
  172. for (i = 0; i < width; i++) {
  173. dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
  174. dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
  175. }
  176. }
  177. static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
  178. {
  179. int i;
  180. int32_t *dstU = (int32_t *) _dstU;
  181. int32_t *dstV = (int32_t *) _dstV;
  182. for (i = 0; i < width; i++) {
  183. dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
  184. dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
  185. }
  186. }
  187. static void lumRangeToJpeg16_c(int16_t *_dst, int width)
  188. {
  189. int i;
  190. int32_t *dst = (int32_t *) _dst;
  191. for (i = 0; i < width; i++)
  192. dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
  193. }
  194. static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
  195. {
  196. int i;
  197. int32_t *dst = (int32_t *) _dst;
  198. for (i = 0; i < width; i++)
  199. dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
  200. }
  201. static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
  202. const uint8_t *src, int srcW, int xInc)
  203. {
  204. int i;
  205. unsigned int xpos=0;
  206. for (i=0;i<dstWidth;i++) {
  207. register unsigned int xx=xpos>>16;
  208. register unsigned int xalpha=(xpos&0xFFFF)>>9;
  209. dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
  210. xpos+=xInc;
  211. }
  212. for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
  213. dst[i] = src[srcW-1]*128;
  214. }
  215. // *** horizontal scale Y line to temp buffer
  216. static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
  217. const uint8_t *src_in[4], int srcW, int xInc,
  218. const int16_t *hLumFilter,
  219. const int32_t *hLumFilterPos, int hLumFilterSize,
  220. uint8_t *formatConvBuffer,
  221. uint32_t *pal, int isAlpha)
  222. {
  223. void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
  224. void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
  225. const uint8_t *src = src_in[isAlpha ? 3 : 0];
  226. if (toYV12) {
  227. toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
  228. src= formatConvBuffer;
  229. } else if (c->readLumPlanar && !isAlpha) {
  230. c->readLumPlanar(formatConvBuffer, src_in, srcW);
  231. src = formatConvBuffer;
  232. }
  233. if (!c->hyscale_fast) {
  234. c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
  235. } else { // fast bilinear upscale / crap downscale
  236. c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
  237. }
  238. if (convertRange)
  239. convertRange(dst, dstWidth);
  240. }
  241. static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
  242. int dstWidth, const uint8_t *src1,
  243. const uint8_t *src2, int srcW, int xInc)
  244. {
  245. int i;
  246. unsigned int xpos=0;
  247. for (i=0;i<dstWidth;i++) {
  248. register unsigned int xx=xpos>>16;
  249. register unsigned int xalpha=(xpos&0xFFFF)>>9;
  250. dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
  251. dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
  252. xpos+=xInc;
  253. }
  254. for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
  255. dst1[i] = src1[srcW-1]*128;
  256. dst2[i] = src2[srcW-1]*128;
  257. }
  258. }
  259. static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
  260. const uint8_t *src_in[4],
  261. int srcW, int xInc, const int16_t *hChrFilter,
  262. const int32_t *hChrFilterPos, int hChrFilterSize,
  263. uint8_t *formatConvBuffer, uint32_t *pal)
  264. {
  265. const uint8_t *src1 = src_in[1], *src2 = src_in[2];
  266. if (c->chrToYV12) {
  267. uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
  268. c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
  269. src1= formatConvBuffer;
  270. src2= buf2;
  271. } else if (c->readChrPlanar) {
  272. uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
  273. c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
  274. src1= formatConvBuffer;
  275. src2= buf2;
  276. }
  277. if (!c->hcscale_fast) {
  278. c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
  279. c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
  280. } else { // fast bilinear upscale / crap downscale
  281. c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
  282. }
  283. if (c->chrConvertRange)
  284. c->chrConvertRange(dst1, dst2, dstWidth);
  285. }
  286. #define DEBUG_SWSCALE_BUFFERS 0
  287. #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
  288. static int swScale(SwsContext *c, const uint8_t* src[],
  289. int srcStride[], int srcSliceY,
  290. int srcSliceH, uint8_t* dst[], int dstStride[])
  291. {
  292. /* load a few things into local vars to make the code more readable? and faster */
  293. const int srcW= c->srcW;
  294. const int dstW= c->dstW;
  295. const int dstH= c->dstH;
  296. const int chrDstW= c->chrDstW;
  297. const int chrSrcW= c->chrSrcW;
  298. const int lumXInc= c->lumXInc;
  299. const int chrXInc= c->chrXInc;
  300. const enum PixelFormat dstFormat= c->dstFormat;
  301. const int flags= c->flags;
  302. int32_t *vLumFilterPos= c->vLumFilterPos;
  303. int32_t *vChrFilterPos= c->vChrFilterPos;
  304. int32_t *hLumFilterPos= c->hLumFilterPos;
  305. int32_t *hChrFilterPos= c->hChrFilterPos;
  306. int16_t *hLumFilter= c->hLumFilter;
  307. int16_t *hChrFilter= c->hChrFilter;
  308. int32_t *lumMmxFilter= c->lumMmxFilter;
  309. int32_t *chrMmxFilter= c->chrMmxFilter;
  310. const int vLumFilterSize= c->vLumFilterSize;
  311. const int vChrFilterSize= c->vChrFilterSize;
  312. const int hLumFilterSize= c->hLumFilterSize;
  313. const int hChrFilterSize= c->hChrFilterSize;
  314. int16_t **lumPixBuf= c->lumPixBuf;
  315. int16_t **chrUPixBuf= c->chrUPixBuf;
  316. int16_t **chrVPixBuf= c->chrVPixBuf;
  317. int16_t **alpPixBuf= c->alpPixBuf;
  318. const int vLumBufSize= c->vLumBufSize;
  319. const int vChrBufSize= c->vChrBufSize;
  320. uint8_t *formatConvBuffer= c->formatConvBuffer;
  321. const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
  322. const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
  323. int lastDstY;
  324. uint32_t *pal=c->pal_yuv;
  325. int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
  326. yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
  327. yuv2planarX_fn yuv2planeX = c->yuv2planeX;
  328. yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
  329. yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
  330. yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
  331. yuv2packedX_fn yuv2packedX = c->yuv2packedX;
  332. /* vars which will change and which we need to store back in the context */
  333. int dstY= c->dstY;
  334. int lumBufIndex= c->lumBufIndex;
  335. int chrBufIndex= c->chrBufIndex;
  336. int lastInLumBuf= c->lastInLumBuf;
  337. int lastInChrBuf= c->lastInChrBuf;
  338. if (isPacked(c->srcFormat)) {
  339. src[0]=
  340. src[1]=
  341. src[2]=
  342. src[3]= src[0];
  343. srcStride[0]=
  344. srcStride[1]=
  345. srcStride[2]=
  346. srcStride[3]= srcStride[0];
  347. }
  348. srcStride[1]<<= c->vChrDrop;
  349. srcStride[2]<<= c->vChrDrop;
  350. DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
  351. src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
  352. dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
  353. DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
  354. srcSliceY, srcSliceH, dstY, dstH);
  355. DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
  356. vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
  357. if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
  358. static int warnedAlready=0; //FIXME move this into the context perhaps
  359. if (flags & SWS_PRINT_INFO && !warnedAlready) {
  360. av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
  361. " ->cannot do aligned memory accesses anymore\n");
  362. warnedAlready=1;
  363. }
  364. }
  365. if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
  366. || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
  367. || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
  368. ) {
  369. static int warnedAlready=0;
  370. int cpu_flags = av_get_cpu_flags();
  371. if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
  372. av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
  373. warnedAlready=1;
  374. }
  375. }
  376. /* Note the user might start scaling the picture in the middle so this
  377. will not get executed. This is not really intended but works
  378. currently, so people might do it. */
  379. if (srcSliceY ==0) {
  380. lumBufIndex=-1;
  381. chrBufIndex=-1;
  382. dstY=0;
  383. lastInLumBuf= -1;
  384. lastInChrBuf= -1;
  385. }
  386. if (!should_dither) {
  387. c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
  388. }
  389. lastDstY= dstY;
  390. for (;dstY < dstH; dstY++) {
  391. const int chrDstY= dstY>>c->chrDstVSubSample;
  392. uint8_t *dest[4] = {
  393. dst[0] + dstStride[0] * dstY,
  394. dst[1] + dstStride[1] * chrDstY,
  395. dst[2] + dstStride[2] * chrDstY,
  396. (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
  397. };
  398. int use_mmx_vfilter= c->use_mmx_vfilter;
  399. const int firstLumSrcY= FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); //First line needed as input
  400. const int firstLumSrcY2= FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]);
  401. const int firstChrSrcY= FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); //First line needed as input
  402. // Last line needed as input
  403. int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
  404. int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
  405. int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
  406. int enough_lines;
  407. //handle holes (FAST_BILINEAR & weird filters)
  408. if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
  409. if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
  410. assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
  411. assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
  412. DEBUG_BUFFERS("dstY: %d\n", dstY);
  413. DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
  414. firstLumSrcY, lastLumSrcY, lastInLumBuf);
  415. DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
  416. firstChrSrcY, lastChrSrcY, lastInChrBuf);
  417. // Do we have enough lines in this slice to output the dstY line
  418. enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
  419. if (!enough_lines) {
  420. lastLumSrcY = srcSliceY + srcSliceH - 1;
  421. lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
  422. DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
  423. lastLumSrcY, lastChrSrcY);
  424. }
  425. //Do horizontal scaling
  426. while(lastInLumBuf < lastLumSrcY) {
  427. const uint8_t *src1[4] = {
  428. src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
  429. src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
  430. src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
  431. src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
  432. };
  433. lumBufIndex++;
  434. assert(lumBufIndex < 2*vLumBufSize);
  435. assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
  436. assert(lastInLumBuf + 1 - srcSliceY >= 0);
  437. hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
  438. hLumFilter, hLumFilterPos, hLumFilterSize,
  439. formatConvBuffer,
  440. pal, 0);
  441. if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
  442. hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
  443. lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
  444. formatConvBuffer,
  445. pal, 1);
  446. lastInLumBuf++;
  447. DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
  448. lumBufIndex, lastInLumBuf);
  449. }
  450. while(lastInChrBuf < lastChrSrcY) {
  451. const uint8_t *src1[4] = {
  452. src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
  453. src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
  454. src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
  455. src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
  456. };
  457. chrBufIndex++;
  458. assert(chrBufIndex < 2*vChrBufSize);
  459. assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
  460. assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
  461. //FIXME replace parameters through context struct (some at least)
  462. if (c->needs_hcscale)
  463. hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
  464. chrDstW, src1, chrSrcW, chrXInc,
  465. hChrFilter, hChrFilterPos, hChrFilterSize,
  466. formatConvBuffer, pal);
  467. lastInChrBuf++;
  468. DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
  469. chrBufIndex, lastInChrBuf);
  470. }
  471. //wrap buf index around to stay inside the ring buffer
  472. if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
  473. if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
  474. if (!enough_lines)
  475. break; //we can't output a dstY line so let's try with the next slice
  476. #if HAVE_MMX
  477. updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
  478. #endif
  479. if (should_dither) {
  480. c->chrDither8 = dither_8x8_128[chrDstY & 7];
  481. c->lumDither8 = dither_8x8_128[dstY & 7];
  482. }
  483. if (dstY >= dstH-2) {
  484. // hmm looks like we can't use MMX here without overwriting this array's tail
  485. ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
  486. &yuv2packed1, &yuv2packed2, &yuv2packedX);
  487. use_mmx_vfilter= 0;
  488. }
  489. {
  490. const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
  491. const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
  492. const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
  493. const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
  494. int16_t *vLumFilter= c->vLumFilter;
  495. int16_t *vChrFilter= c->vChrFilter;
  496. if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
  497. const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
  498. vLumFilter += dstY * vLumFilterSize;
  499. vChrFilter += chrDstY * vChrFilterSize;
  500. // av_assert0(use_mmx_vfilter != (
  501. // yuv2planeX == yuv2planeX_10BE_c
  502. // || yuv2planeX == yuv2planeX_10LE_c
  503. // || yuv2planeX == yuv2planeX_9BE_c
  504. // || yuv2planeX == yuv2planeX_9LE_c
  505. // || yuv2planeX == yuv2planeX_16BE_c
  506. // || yuv2planeX == yuv2planeX_16LE_c
  507. // || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
  508. if(use_mmx_vfilter){
  509. vLumFilter= c->lumMmxFilter;
  510. vChrFilter= c->chrMmxFilter;
  511. }
  512. if (vLumFilterSize == 1) {
  513. yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
  514. } else {
  515. yuv2planeX(vLumFilter, vLumFilterSize,
  516. lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
  517. }
  518. if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
  519. if (yuv2nv12cX) {
  520. yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
  521. } else if (vChrFilterSize == 1) {
  522. yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
  523. yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
  524. } else {
  525. yuv2planeX(vChrFilter, vChrFilterSize,
  526. chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
  527. yuv2planeX(vChrFilter, vChrFilterSize,
  528. chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
  529. }
  530. }
  531. if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
  532. if(use_mmx_vfilter){
  533. vLumFilter= c->alpMmxFilter;
  534. }
  535. if (vLumFilterSize == 1) {
  536. yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
  537. } else {
  538. yuv2planeX(vLumFilter, vLumFilterSize,
  539. alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
  540. }
  541. }
  542. } else {
  543. assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
  544. assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
  545. if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 2) { //unscaled RGB
  546. int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
  547. yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
  548. alpPixBuf ? *alpSrcPtr : NULL,
  549. dest[0], dstW, chrAlpha, dstY);
  550. } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
  551. int lumAlpha = vLumFilter[2 * dstY + 1];
  552. int chrAlpha = vChrFilter[2 * dstY + 1];
  553. lumMmxFilter[2] =
  554. lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
  555. chrMmxFilter[2] =
  556. chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
  557. yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
  558. alpPixBuf ? alpSrcPtr : NULL,
  559. dest[0], dstW, lumAlpha, chrAlpha, dstY);
  560. } else { //general RGB
  561. yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
  562. lumSrcPtr, vLumFilterSize,
  563. vChrFilter + dstY * vChrFilterSize,
  564. chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
  565. alpSrcPtr, dest[0], dstW, dstY);
  566. }
  567. }
  568. }
  569. }
  570. if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
  571. fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
  572. #if HAVE_MMX2
  573. if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
  574. __asm__ volatile("sfence":::"memory");
  575. #endif
  576. emms_c();
  577. /* store changed local vars back in the context */
  578. c->dstY= dstY;
  579. c->lumBufIndex= lumBufIndex;
  580. c->chrBufIndex= chrBufIndex;
  581. c->lastInLumBuf= lastInLumBuf;
  582. c->lastInChrBuf= lastInChrBuf;
  583. return dstY - lastDstY;
  584. }
  585. static av_cold void sws_init_swScale_c(SwsContext *c)
  586. {
  587. enum PixelFormat srcFormat = c->srcFormat;
  588. ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
  589. &c->yuv2nv12cX, &c->yuv2packed1,
  590. &c->yuv2packed2, &c->yuv2packedX);
  591. ff_sws_init_input_funcs(c);
  592. if (c->srcBpc == 8) {
  593. if (c->dstBpc <= 10) {
  594. c->hyScale = c->hcScale = hScale8To15_c;
  595. if (c->flags & SWS_FAST_BILINEAR) {
  596. c->hyscale_fast = hyscale_fast_c;
  597. c->hcscale_fast = hcscale_fast_c;
  598. }
  599. } else {
  600. c->hyScale = c->hcScale = hScale8To19_c;
  601. }
  602. } else {
  603. c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
  604. }
  605. if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
  606. if (c->dstBpc <= 10) {
  607. if (c->srcRange) {
  608. c->lumConvertRange = lumRangeFromJpeg_c;
  609. c->chrConvertRange = chrRangeFromJpeg_c;
  610. } else {
  611. c->lumConvertRange = lumRangeToJpeg_c;
  612. c->chrConvertRange = chrRangeToJpeg_c;
  613. }
  614. } else {
  615. if (c->srcRange) {
  616. c->lumConvertRange = lumRangeFromJpeg16_c;
  617. c->chrConvertRange = chrRangeFromJpeg16_c;
  618. } else {
  619. c->lumConvertRange = lumRangeToJpeg16_c;
  620. c->chrConvertRange = chrRangeToJpeg16_c;
  621. }
  622. }
  623. }
  624. if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
  625. srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
  626. c->needs_hcscale = 1;
  627. }
  628. SwsFunc ff_getSwsFunc(SwsContext *c)
  629. {
  630. sws_init_swScale_c(c);
  631. if (HAVE_MMX)
  632. ff_sws_init_swScale_mmx(c);
  633. if (HAVE_ALTIVEC)
  634. ff_sws_init_swScale_altivec(c);
  635. return swScale;
  636. }