You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

637 lines
18KB

  1. /*
  2. * High quality image resampling with polyphase filters
  3. * Copyright (c) 2001 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include "avcodec.h"
  20. #include "dsputil.h"
  21. #ifdef USE_FASTMEMCPY
  22. #include "fastmemcpy.h"
  23. #endif
  24. #define NB_COMPONENTS 3
  25. #define PHASE_BITS 4
  26. #define NB_PHASES (1 << PHASE_BITS)
  27. #define NB_TAPS 4
  28. #define FCENTER 1 /* index of the center of the filter */
  29. //#define TEST 1 /* Test it */
  30. #define POS_FRAC_BITS 16
  31. #define POS_FRAC (1 << POS_FRAC_BITS)
  32. /* 6 bits precision is needed for MMX */
  33. #define FILTER_BITS 8
  34. #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  35. struct ImgReSampleContext {
  36. int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
  37. int h_incr, v_incr;
  38. INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  39. INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  40. UINT8 *line_buf;
  41. };
  42. static inline int get_phase(int pos)
  43. {
  44. return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  45. }
  46. /* This function must be optimized */
  47. static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  48. int src_start, int src_incr, INT16 *filters)
  49. {
  50. int src_pos, phase, sum, i;
  51. UINT8 *s;
  52. INT16 *filter;
  53. src_pos = src_start;
  54. for(i=0;i<dst_width;i++) {
  55. #ifdef TEST
  56. /* test */
  57. if ((src_pos >> POS_FRAC_BITS) < 0 ||
  58. (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  59. av_abort();
  60. #endif
  61. s = src + (src_pos >> POS_FRAC_BITS);
  62. phase = get_phase(src_pos);
  63. filter = filters + phase * NB_TAPS;
  64. #if NB_TAPS == 4
  65. sum = s[0] * filter[0] +
  66. s[1] * filter[1] +
  67. s[2] * filter[2] +
  68. s[3] * filter[3];
  69. #else
  70. {
  71. int j;
  72. sum = 0;
  73. for(j=0;j<NB_TAPS;j++)
  74. sum += s[j] * filter[j];
  75. }
  76. #endif
  77. sum = sum >> FILTER_BITS;
  78. if (sum < 0)
  79. sum = 0;
  80. else if (sum > 255)
  81. sum = 255;
  82. dst[0] = sum;
  83. src_pos += src_incr;
  84. dst++;
  85. }
  86. }
  87. /* This function must be optimized */
  88. static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
  89. INT16 *filter)
  90. {
  91. int sum, i;
  92. UINT8 *s;
  93. s = src;
  94. for(i=0;i<dst_width;i++) {
  95. #if NB_TAPS == 4
  96. sum = s[0 * wrap] * filter[0] +
  97. s[1 * wrap] * filter[1] +
  98. s[2 * wrap] * filter[2] +
  99. s[3 * wrap] * filter[3];
  100. #else
  101. {
  102. int j;
  103. UINT8 *s1 = s;
  104. sum = 0;
  105. for(j=0;j<NB_TAPS;j++) {
  106. sum += s1[0] * filter[j];
  107. s1 += wrap;
  108. }
  109. }
  110. #endif
  111. sum = sum >> FILTER_BITS;
  112. if (sum < 0)
  113. sum = 0;
  114. else if (sum > 255)
  115. sum = 255;
  116. dst[0] = sum;
  117. dst++;
  118. s++;
  119. }
  120. }
  121. #ifdef HAVE_MMX
  122. #include "i386/mmx.h"
  123. #define FILTER4(reg) \
  124. {\
  125. s = src + (src_pos >> POS_FRAC_BITS);\
  126. phase = get_phase(src_pos);\
  127. filter = filters + phase * NB_TAPS;\
  128. movq_m2r(*s, reg);\
  129. punpcklbw_r2r(mm7, reg);\
  130. movq_m2r(*filter, mm6);\
  131. pmaddwd_r2r(reg, mm6);\
  132. movq_r2r(mm6, reg);\
  133. psrlq_i2r(32, reg);\
  134. paddd_r2r(mm6, reg);\
  135. psrad_i2r(FILTER_BITS, reg);\
  136. src_pos += src_incr;\
  137. }
  138. #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
  139. /* XXX: do four pixels at a time */
  140. static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  141. int src_start, int src_incr, INT16 *filters)
  142. {
  143. int src_pos, phase;
  144. UINT8 *s;
  145. INT16 *filter;
  146. mmx_t tmp;
  147. src_pos = src_start;
  148. pxor_r2r(mm7, mm7);
  149. while (dst_width >= 4) {
  150. FILTER4(mm0);
  151. FILTER4(mm1);
  152. FILTER4(mm2);
  153. FILTER4(mm3);
  154. packuswb_r2r(mm7, mm0);
  155. packuswb_r2r(mm7, mm1);
  156. packuswb_r2r(mm7, mm3);
  157. packuswb_r2r(mm7, mm2);
  158. movq_r2m(mm0, tmp);
  159. dst[0] = tmp.ub[0];
  160. movq_r2m(mm1, tmp);
  161. dst[1] = tmp.ub[0];
  162. movq_r2m(mm2, tmp);
  163. dst[2] = tmp.ub[0];
  164. movq_r2m(mm3, tmp);
  165. dst[3] = tmp.ub[0];
  166. dst += 4;
  167. dst_width -= 4;
  168. }
  169. while (dst_width > 0) {
  170. FILTER4(mm0);
  171. packuswb_r2r(mm7, mm0);
  172. movq_r2m(mm0, tmp);
  173. dst[0] = tmp.ub[0];
  174. dst++;
  175. dst_width--;
  176. }
  177. emms();
  178. }
  179. static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
  180. INT16 *filter)
  181. {
  182. int sum, i, v;
  183. UINT8 *s;
  184. mmx_t tmp;
  185. mmx_t coefs[4];
  186. for(i=0;i<4;i++) {
  187. v = filter[i];
  188. coefs[i].uw[0] = v;
  189. coefs[i].uw[1] = v;
  190. coefs[i].uw[2] = v;
  191. coefs[i].uw[3] = v;
  192. }
  193. pxor_r2r(mm7, mm7);
  194. s = src;
  195. while (dst_width >= 4) {
  196. movq_m2r(s[0 * wrap], mm0);
  197. punpcklbw_r2r(mm7, mm0);
  198. movq_m2r(s[1 * wrap], mm1);
  199. punpcklbw_r2r(mm7, mm1);
  200. movq_m2r(s[2 * wrap], mm2);
  201. punpcklbw_r2r(mm7, mm2);
  202. movq_m2r(s[3 * wrap], mm3);
  203. punpcklbw_r2r(mm7, mm3);
  204. pmullw_m2r(coefs[0], mm0);
  205. pmullw_m2r(coefs[1], mm1);
  206. pmullw_m2r(coefs[2], mm2);
  207. pmullw_m2r(coefs[3], mm3);
  208. paddw_r2r(mm1, mm0);
  209. paddw_r2r(mm3, mm2);
  210. paddw_r2r(mm2, mm0);
  211. psraw_i2r(FILTER_BITS, mm0);
  212. packuswb_r2r(mm7, mm0);
  213. movq_r2m(mm0, tmp);
  214. *(UINT32 *)dst = tmp.ud[0];
  215. dst += 4;
  216. s += 4;
  217. dst_width -= 4;
  218. }
  219. while (dst_width > 0) {
  220. sum = s[0 * wrap] * filter[0] +
  221. s[1 * wrap] * filter[1] +
  222. s[2 * wrap] * filter[2] +
  223. s[3 * wrap] * filter[3];
  224. sum = sum >> FILTER_BITS;
  225. if (sum < 0)
  226. sum = 0;
  227. else if (sum > 255)
  228. sum = 255;
  229. dst[0] = sum;
  230. dst++;
  231. s++;
  232. dst_width--;
  233. }
  234. emms();
  235. }
  236. #endif
  237. /* slow version to handle limit cases. Does not need optimisation */
  238. static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  239. int src_start, int src_incr, INT16 *filters)
  240. {
  241. int src_pos, phase, sum, j, v, i;
  242. UINT8 *s, *src_end;
  243. INT16 *filter;
  244. src_end = src + src_width;
  245. src_pos = src_start;
  246. for(i=0;i<dst_width;i++) {
  247. s = src + (src_pos >> POS_FRAC_BITS);
  248. phase = get_phase(src_pos);
  249. filter = filters + phase * NB_TAPS;
  250. sum = 0;
  251. for(j=0;j<NB_TAPS;j++) {
  252. if (s < src)
  253. v = src[0];
  254. else if (s >= src_end)
  255. v = src_end[-1];
  256. else
  257. v = s[0];
  258. sum += v * filter[j];
  259. s++;
  260. }
  261. sum = sum >> FILTER_BITS;
  262. if (sum < 0)
  263. sum = 0;
  264. else if (sum > 255)
  265. sum = 255;
  266. dst[0] = sum;
  267. src_pos += src_incr;
  268. dst++;
  269. }
  270. }
  271. static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  272. int src_start, int src_incr, INT16 *filters)
  273. {
  274. int n, src_end;
  275. if (src_start < 0) {
  276. n = (0 - src_start + src_incr - 1) / src_incr;
  277. h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
  278. dst += n;
  279. dst_width -= n;
  280. src_start += n * src_incr;
  281. }
  282. src_end = src_start + dst_width * src_incr;
  283. if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
  284. n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
  285. src_incr;
  286. } else {
  287. n = dst_width;
  288. }
  289. #ifdef HAVE_MMX
  290. if ((mm_flags & MM_MMX) && NB_TAPS == 4)
  291. h_resample_fast4_mmx(dst, n,
  292. src, src_width, src_start, src_incr, filters);
  293. else
  294. #endif
  295. h_resample_fast(dst, n,
  296. src, src_width, src_start, src_incr, filters);
  297. if (n < dst_width) {
  298. dst += n;
  299. dst_width -= n;
  300. src_start += n * src_incr;
  301. h_resample_slow(dst, dst_width,
  302. src, src_width, src_start, src_incr, filters);
  303. }
  304. }
  305. static void component_resample(ImgReSampleContext *s,
  306. UINT8 *output, int owrap, int owidth, int oheight,
  307. UINT8 *input, int iwrap, int iwidth, int iheight)
  308. {
  309. int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
  310. UINT8 *new_line, *src_line;
  311. last_src_y = - FCENTER - 1;
  312. /* position of the bottom of the filter in the source image */
  313. src_y = (last_src_y + NB_TAPS) * POS_FRAC;
  314. ring_y = NB_TAPS; /* position in ring buffer */
  315. for(y=0;y<oheight;y++) {
  316. /* apply horizontal filter on new lines from input if needed */
  317. src_y1 = src_y >> POS_FRAC_BITS;
  318. while (last_src_y < src_y1) {
  319. if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
  320. ring_y = NB_TAPS;
  321. last_src_y++;
  322. /* handle limit conditions : replicate line (slightly
  323. inefficient because we filter multiple times) */
  324. y1 = last_src_y;
  325. if (y1 < 0) {
  326. y1 = 0;
  327. } else if (y1 >= iheight) {
  328. y1 = iheight - 1;
  329. }
  330. src_line = input + y1 * iwrap;
  331. new_line = s->line_buf + ring_y * owidth;
  332. /* apply filter and handle limit cases correctly */
  333. h_resample(new_line, owidth,
  334. src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
  335. &s->h_filters[0][0]);
  336. /* handle ring buffer wraping */
  337. if (ring_y >= LINE_BUF_HEIGHT) {
  338. memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
  339. new_line, owidth);
  340. }
  341. }
  342. /* apply vertical filter */
  343. phase_y = get_phase(src_y);
  344. #ifdef HAVE_MMX
  345. /* desactivated MMX because loss of precision */
  346. if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
  347. v_resample4_mmx(output, owidth,
  348. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  349. &s->v_filters[phase_y][0]);
  350. else
  351. #endif
  352. v_resample(output, owidth,
  353. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  354. &s->v_filters[phase_y][0]);
  355. src_y += s->v_incr;
  356. output += owrap;
  357. }
  358. }
  359. /* XXX: the following filter is quite naive, but it seems to suffice
  360. for 4 taps */
  361. static void build_filter(INT16 *filter, float factor)
  362. {
  363. int ph, i, v;
  364. float x, y, tab[NB_TAPS], norm, mult;
  365. /* if upsampling, only need to interpolate, no filter */
  366. if (factor > 1.0)
  367. factor = 1.0;
  368. for(ph=0;ph<NB_PHASES;ph++) {
  369. norm = 0;
  370. for(i=0;i<NB_TAPS;i++) {
  371. x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
  372. if (x == 0)
  373. y = 1.0;
  374. else
  375. y = sin(x) / x;
  376. tab[i] = y;
  377. norm += y;
  378. }
  379. /* normalize so that an uniform color remains the same */
  380. mult = (float)(1 << FILTER_BITS) / norm;
  381. for(i=0;i<NB_TAPS;i++) {
  382. v = (int)(tab[i] * mult);
  383. filter[ph * NB_TAPS + i] = v;
  384. }
  385. }
  386. }
  387. ImgReSampleContext *img_resample_init(int owidth, int oheight,
  388. int iwidth, int iheight)
  389. {
  390. return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
  391. }
  392. ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
  393. int iwidth, int iheight,
  394. int topBand, int bottomBand,
  395. int leftBand, int rightBand)
  396. {
  397. ImgReSampleContext *s;
  398. s = av_mallocz(sizeof(ImgReSampleContext));
  399. if (!s)
  400. return NULL;
  401. s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
  402. if (!s->line_buf)
  403. goto fail;
  404. s->owidth = owidth;
  405. s->oheight = oheight;
  406. s->iwidth = iwidth;
  407. s->iheight = iheight;
  408. s->topBand = topBand;
  409. s->bottomBand = bottomBand;
  410. s->leftBand = leftBand;
  411. s->rightBand = rightBand;
  412. s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
  413. s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
  414. build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand));
  415. build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
  416. return s;
  417. fail:
  418. av_free(s);
  419. return NULL;
  420. }
  421. void img_resample(ImgReSampleContext *s,
  422. AVPicture *output, AVPicture *input)
  423. {
  424. int i, shift;
  425. for(i=0;i<3;i++) {
  426. shift = (i == 0) ? 0 : 1;
  427. component_resample(s, output->data[i], output->linesize[i],
  428. s->owidth >> shift, s->oheight >> shift,
  429. input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
  430. input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
  431. (s->iheight - s->topBand - s->bottomBand) >> shift);
  432. }
  433. }
  434. void img_resample_close(ImgReSampleContext *s)
  435. {
  436. av_free(s->line_buf);
  437. av_free(s);
  438. }
  439. #ifdef TEST
  440. void *av_mallocz(int size)
  441. {
  442. void *ptr;
  443. ptr = malloc(size);
  444. memset(ptr, 0, size);
  445. return ptr;
  446. }
  447. void av_free(void *ptr)
  448. {
  449. /* XXX: this test should not be needed on most libcs */
  450. if (ptr)
  451. free(ptr);
  452. }
  453. /* input */
  454. #define XSIZE 256
  455. #define YSIZE 256
  456. UINT8 img[XSIZE * YSIZE];
  457. /* output */
  458. #define XSIZE1 512
  459. #define YSIZE1 512
  460. UINT8 img1[XSIZE1 * YSIZE1];
  461. UINT8 img2[XSIZE1 * YSIZE1];
  462. void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
  463. {
  464. FILE *f;
  465. f=fopen(filename,"w");
  466. fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
  467. fwrite(img,1, xsize * ysize,f);
  468. fclose(f);
  469. }
  470. static void dump_filter(INT16 *filter)
  471. {
  472. int i, ph;
  473. for(ph=0;ph<NB_PHASES;ph++) {
  474. printf("%2d: ", ph);
  475. for(i=0;i<NB_TAPS;i++) {
  476. printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
  477. }
  478. printf("\n");
  479. }
  480. }
  481. #ifdef HAVE_MMX
  482. int mm_flags;
  483. #endif
  484. int main(int argc, char **argv)
  485. {
  486. int x, y, v, i, xsize, ysize;
  487. ImgReSampleContext *s;
  488. float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
  489. char buf[256];
  490. /* build test image */
  491. for(y=0;y<YSIZE;y++) {
  492. for(x=0;x<XSIZE;x++) {
  493. if (x < XSIZE/2 && y < YSIZE/2) {
  494. if (x < XSIZE/4 && y < YSIZE/4) {
  495. if ((x % 10) <= 6 &&
  496. (y % 10) <= 6)
  497. v = 0xff;
  498. else
  499. v = 0x00;
  500. } else if (x < XSIZE/4) {
  501. if (x & 1)
  502. v = 0xff;
  503. else
  504. v = 0;
  505. } else if (y < XSIZE/4) {
  506. if (y & 1)
  507. v = 0xff;
  508. else
  509. v = 0;
  510. } else {
  511. if (y < YSIZE*3/8) {
  512. if ((y+x) & 1)
  513. v = 0xff;
  514. else
  515. v = 0;
  516. } else {
  517. if (((x+3) % 4) <= 1 &&
  518. ((y+3) % 4) <= 1)
  519. v = 0xff;
  520. else
  521. v = 0x00;
  522. }
  523. }
  524. } else if (x < XSIZE/2) {
  525. v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
  526. } else if (y < XSIZE/2) {
  527. v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
  528. } else {
  529. v = ((x + y - XSIZE) * 255) / XSIZE;
  530. }
  531. img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
  532. }
  533. }
  534. save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
  535. for(i=0;i<sizeof(factors)/sizeof(float);i++) {
  536. fact = factors[i];
  537. xsize = (int)(XSIZE * fact);
  538. ysize = (int)((YSIZE - 100) * fact);
  539. s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
  540. printf("Factor=%0.2f\n", fact);
  541. dump_filter(&s->h_filters[0][0]);
  542. component_resample(s, img1, xsize, xsize, ysize,
  543. img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
  544. img_resample_close(s);
  545. sprintf(buf, "/tmp/out%d.pgm", i);
  546. save_pgm(buf, img1, xsize, ysize);
  547. }
  548. /* mmx test */
  549. #ifdef HAVE_MMX
  550. printf("MMX test\n");
  551. fact = 0.72;
  552. xsize = (int)(XSIZE * fact);
  553. ysize = (int)(YSIZE * fact);
  554. mm_flags = MM_MMX;
  555. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  556. component_resample(s, img1, xsize, xsize, ysize,
  557. img, XSIZE, XSIZE, YSIZE);
  558. mm_flags = 0;
  559. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  560. component_resample(s, img2, xsize, xsize, ysize,
  561. img, XSIZE, XSIZE, YSIZE);
  562. if (memcmp(img1, img2, xsize * ysize) != 0) {
  563. fprintf(stderr, "mmx error\n");
  564. exit(1);
  565. }
  566. printf("MMX OK\n");
  567. #endif
  568. return 0;
  569. }
  570. #endif