You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

615 lines
17KB

  1. /*
  2. * High quality image resampling with polyphase filters
  3. * Copyright (c) 2001 Gerard Lantau.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <string.h>
  22. #include <math.h>
  23. #include "dsputil.h"
  24. #include "avcodec.h"
  25. #define NB_COMPONENTS 3
  26. #define PHASE_BITS 4
  27. #define NB_PHASES (1 << PHASE_BITS)
  28. #define NB_TAPS 4
  29. #define FCENTER 1 /* index of the center of the filter */
  30. #define POS_FRAC_BITS 16
  31. #define POS_FRAC (1 << POS_FRAC_BITS)
  32. /* 6 bits precision is needed for MMX */
  33. #define FILTER_BITS 8
  34. #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  35. struct ImgReSampleContext {
  36. int iwidth, iheight, owidth, oheight;
  37. int h_incr, v_incr;
  38. INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  39. INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  40. UINT8 *line_buf;
  41. };
  42. static inline int get_phase(int pos)
  43. {
  44. return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  45. }
  46. /* This function must be optimized */
  47. static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  48. int src_start, int src_incr, INT16 *filters)
  49. {
  50. int src_pos, phase, sum, i;
  51. UINT8 *s;
  52. INT16 *filter;
  53. src_pos = src_start;
  54. for(i=0;i<dst_width;i++) {
  55. #ifdef TEST
  56. /* test */
  57. if ((src_pos >> POS_FRAC_BITS) < 0 ||
  58. (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  59. abort();
  60. #endif
  61. s = src + (src_pos >> POS_FRAC_BITS);
  62. phase = get_phase(src_pos);
  63. filter = filters + phase * NB_TAPS;
  64. #if NB_TAPS == 4
  65. sum = s[0] * filter[0] +
  66. s[1] * filter[1] +
  67. s[2] * filter[2] +
  68. s[3] * filter[3];
  69. #else
  70. {
  71. int j;
  72. sum = 0;
  73. for(j=0;j<NB_TAPS;j++)
  74. sum += s[j] * filter[j];
  75. }
  76. #endif
  77. sum = sum >> FILTER_BITS;
  78. if (sum < 0)
  79. sum = 0;
  80. else if (sum > 255)
  81. sum = 255;
  82. dst[0] = sum;
  83. src_pos += src_incr;
  84. dst++;
  85. }
  86. }
  87. /* This function must be optimized */
  88. static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
  89. INT16 *filter)
  90. {
  91. int sum, i;
  92. UINT8 *s;
  93. s = src;
  94. for(i=0;i<dst_width;i++) {
  95. #if NB_TAPS == 4
  96. sum = s[0 * wrap] * filter[0] +
  97. s[1 * wrap] * filter[1] +
  98. s[2 * wrap] * filter[2] +
  99. s[3 * wrap] * filter[3];
  100. #else
  101. {
  102. int j;
  103. UINT8 *s1 = s;
  104. sum = 0;
  105. for(j=0;j<NB_TAPS;j++) {
  106. sum += s1[0] * filter[j];
  107. s1 += wrap;
  108. }
  109. }
  110. #endif
  111. sum = sum >> FILTER_BITS;
  112. if (sum < 0)
  113. sum = 0;
  114. else if (sum > 255)
  115. sum = 255;
  116. dst[0] = sum;
  117. dst++;
  118. s++;
  119. }
  120. }
  121. #ifdef CONFIG_MMX
  122. #include "i386/mmx.h"
  123. #define FILTER4(reg) \
  124. {\
  125. s = src + (src_pos >> POS_FRAC_BITS);\
  126. phase = get_phase(src_pos);\
  127. filter = filters + phase * NB_TAPS;\
  128. movq_m2r(*s, reg);\
  129. punpcklbw_r2r(mm7, reg);\
  130. movq_m2r(*filter, mm6);\
  131. pmaddwd_r2r(reg, mm6);\
  132. movq_r2r(mm6, reg);\
  133. psrlq_i2r(32, reg);\
  134. paddd_r2r(mm6, reg);\
  135. psrad_i2r(FILTER_BITS, reg);\
  136. src_pos += src_incr;\
  137. }
  138. #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
  139. /* XXX: do four pixels at a time */
  140. static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  141. int src_start, int src_incr, INT16 *filters)
  142. {
  143. int src_pos, phase;
  144. UINT8 *s;
  145. INT16 *filter;
  146. mmx_t tmp;
  147. src_pos = src_start;
  148. pxor_r2r(mm7, mm7);
  149. while (dst_width >= 4) {
  150. FILTER4(mm0);
  151. FILTER4(mm1);
  152. FILTER4(mm2);
  153. FILTER4(mm3);
  154. packuswb_r2r(mm7, mm0);
  155. packuswb_r2r(mm7, mm1);
  156. packuswb_r2r(mm7, mm3);
  157. packuswb_r2r(mm7, mm2);
  158. movq_r2m(mm0, tmp);
  159. dst[0] = tmp.ub[0];
  160. movq_r2m(mm1, tmp);
  161. dst[1] = tmp.ub[0];
  162. movq_r2m(mm2, tmp);
  163. dst[2] = tmp.ub[0];
  164. movq_r2m(mm3, tmp);
  165. dst[3] = tmp.ub[0];
  166. dst += 4;
  167. dst_width -= 4;
  168. }
  169. while (dst_width > 0) {
  170. FILTER4(mm0);
  171. packuswb_r2r(mm7, mm0);
  172. movq_r2m(mm0, tmp);
  173. dst[0] = tmp.ub[0];
  174. dst++;
  175. dst_width--;
  176. }
  177. emms();
  178. }
  179. static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
  180. INT16 *filter)
  181. {
  182. int sum, i, v;
  183. UINT8 *s;
  184. mmx_t tmp;
  185. mmx_t coefs[4];
  186. for(i=0;i<4;i++) {
  187. v = filter[i];
  188. coefs[i].uw[0] = v;
  189. coefs[i].uw[1] = v;
  190. coefs[i].uw[2] = v;
  191. coefs[i].uw[3] = v;
  192. }
  193. pxor_r2r(mm7, mm7);
  194. s = src;
  195. while (dst_width >= 4) {
  196. movq_m2r(s[0 * wrap], mm0);
  197. punpcklbw_r2r(mm7, mm0);
  198. movq_m2r(s[1 * wrap], mm1);
  199. punpcklbw_r2r(mm7, mm1);
  200. movq_m2r(s[2 * wrap], mm2);
  201. punpcklbw_r2r(mm7, mm2);
  202. movq_m2r(s[3 * wrap], mm3);
  203. punpcklbw_r2r(mm7, mm3);
  204. pmullw_m2r(coefs[0], mm0);
  205. pmullw_m2r(coefs[1], mm1);
  206. pmullw_m2r(coefs[2], mm2);
  207. pmullw_m2r(coefs[3], mm3);
  208. paddw_r2r(mm1, mm0);
  209. paddw_r2r(mm3, mm2);
  210. paddw_r2r(mm2, mm0);
  211. psraw_i2r(FILTER_BITS, mm0);
  212. packuswb_r2r(mm7, mm0);
  213. movq_r2m(mm0, tmp);
  214. *(UINT32 *)dst = tmp.ud[0];
  215. dst += 4;
  216. s += 4;
  217. dst_width -= 4;
  218. }
  219. while (dst_width > 0) {
  220. sum = s[0 * wrap] * filter[0] +
  221. s[1 * wrap] * filter[1] +
  222. s[2 * wrap] * filter[2] +
  223. s[3 * wrap] * filter[3];
  224. sum = sum >> FILTER_BITS;
  225. if (sum < 0)
  226. sum = 0;
  227. else if (sum > 255)
  228. sum = 255;
  229. dst[0] = sum;
  230. dst++;
  231. s++;
  232. dst_width--;
  233. }
  234. emms();
  235. }
  236. #endif
  237. /* slow version to handle limit cases. Does not need optimisation */
  238. static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  239. int src_start, int src_incr, INT16 *filters)
  240. {
  241. int src_pos, phase, sum, j, v, i;
  242. UINT8 *s, *src_end;
  243. INT16 *filter;
  244. src_end = src + src_width;
  245. src_pos = src_start;
  246. for(i=0;i<dst_width;i++) {
  247. s = src + (src_pos >> POS_FRAC_BITS);
  248. phase = get_phase(src_pos);
  249. filter = filters + phase * NB_TAPS;
  250. sum = 0;
  251. for(j=0;j<NB_TAPS;j++) {
  252. if (s < src)
  253. v = src[0];
  254. else if (s >= src_end)
  255. v = src_end[-1];
  256. else
  257. v = s[0];
  258. sum += v * filter[j];
  259. s++;
  260. }
  261. sum = sum >> FILTER_BITS;
  262. if (sum < 0)
  263. sum = 0;
  264. else if (sum > 255)
  265. sum = 255;
  266. dst[0] = sum;
  267. src_pos += src_incr;
  268. dst++;
  269. }
  270. }
  271. static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
  272. int src_start, int src_incr, INT16 *filters)
  273. {
  274. int n, src_end;
  275. if (src_start < 0) {
  276. n = (0 - src_start + src_incr - 1) / src_incr;
  277. h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
  278. dst += n;
  279. dst_width -= n;
  280. src_start += n * src_incr;
  281. }
  282. src_end = src_start + dst_width * src_incr;
  283. if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
  284. n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
  285. src_incr;
  286. } else {
  287. n = dst_width;
  288. }
  289. #ifdef CONFIG_MMX
  290. if ((mm_flags & MM_MMX) && NB_TAPS == 4)
  291. h_resample_fast4_mmx(dst, n,
  292. src, src_width, src_start, src_incr, filters);
  293. else
  294. #endif
  295. h_resample_fast(dst, n,
  296. src, src_width, src_start, src_incr, filters);
  297. if (n < dst_width) {
  298. dst += n;
  299. dst_width -= n;
  300. src_start += n * src_incr;
  301. h_resample_slow(dst, dst_width,
  302. src, src_width, src_start, src_incr, filters);
  303. }
  304. }
  305. static void component_resample(ImgReSampleContext *s,
  306. UINT8 *output, int owrap, int owidth, int oheight,
  307. UINT8 *input, int iwrap, int iwidth, int iheight)
  308. {
  309. int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
  310. UINT8 *new_line, *src_line;
  311. last_src_y = - FCENTER - 1;
  312. /* position of the bottom of the filter in the source image */
  313. src_y = (last_src_y + NB_TAPS) * POS_FRAC;
  314. ring_y = NB_TAPS; /* position in ring buffer */
  315. for(y=0;y<oheight;y++) {
  316. /* apply horizontal filter on new lines from input if needed */
  317. src_y1 = src_y >> POS_FRAC_BITS;
  318. while (last_src_y < src_y1) {
  319. if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
  320. ring_y = NB_TAPS;
  321. last_src_y++;
  322. /* handle limit conditions : replicate line (slighly
  323. inefficient because we filter multiple times */
  324. y1 = last_src_y;
  325. if (y1 < 0) {
  326. y1 = 0;
  327. } else if (y1 >= iheight) {
  328. y1 = iheight - 1;
  329. }
  330. src_line = input + y1 * iwrap;
  331. new_line = s->line_buf + ring_y * owidth;
  332. /* apply filter and handle limit cases correctly */
  333. h_resample(new_line, owidth,
  334. src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
  335. &s->h_filters[0][0]);
  336. /* handle ring buffer wraping */
  337. if (ring_y >= LINE_BUF_HEIGHT) {
  338. memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
  339. new_line, owidth);
  340. }
  341. }
  342. /* apply vertical filter */
  343. phase_y = get_phase(src_y);
  344. #ifdef CONFIG_MMX
  345. /* desactivated MMX because loss of precision */
  346. if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
  347. v_resample4_mmx(output, owidth,
  348. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  349. &s->v_filters[phase_y][0]);
  350. else
  351. #endif
  352. v_resample(output, owidth,
  353. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  354. &s->v_filters[phase_y][0]);
  355. src_y += s->v_incr;
  356. output += owrap;
  357. }
  358. }
  359. /* XXX: the following filter is quite naive, but it seems to suffice
  360. for 4 taps */
  361. static void build_filter(INT16 *filter, float factor)
  362. {
  363. int ph, i, v;
  364. float x, y, tab[NB_TAPS], norm, mult;
  365. /* if upsampling, only need to interpolate, no filter */
  366. if (factor > 1.0)
  367. factor = 1.0;
  368. for(ph=0;ph<NB_PHASES;ph++) {
  369. norm = 0;
  370. for(i=0;i<NB_TAPS;i++) {
  371. x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
  372. if (x == 0)
  373. y = 1.0;
  374. else
  375. y = sin(x) / x;
  376. tab[i] = y;
  377. norm += y;
  378. }
  379. /* normalize so that an uniform color remains the same */
  380. mult = (float)(1 << FILTER_BITS) / norm;
  381. for(i=0;i<NB_TAPS;i++) {
  382. v = (int)(tab[i] * mult);
  383. filter[ph * NB_TAPS + i] = v;
  384. }
  385. }
  386. }
  387. ImgReSampleContext *img_resample_init(int owidth, int oheight,
  388. int iwidth, int iheight)
  389. {
  390. ImgReSampleContext *s;
  391. s = av_mallocz(sizeof(ImgReSampleContext));
  392. if (!s)
  393. return NULL;
  394. s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
  395. if (!s->line_buf)
  396. goto fail;
  397. s->owidth = owidth;
  398. s->oheight = oheight;
  399. s->iwidth = iwidth;
  400. s->iheight = iheight;
  401. s->h_incr = (iwidth * POS_FRAC) / owidth;
  402. s->v_incr = (iheight * POS_FRAC) / oheight;
  403. build_filter(&s->h_filters[0][0], (float)owidth / (float)iwidth);
  404. build_filter(&s->v_filters[0][0], (float)oheight / (float)iheight);
  405. return s;
  406. fail:
  407. free(s);
  408. return NULL;
  409. }
  410. void img_resample(ImgReSampleContext *s,
  411. AVPicture *output, AVPicture *input)
  412. {
  413. int i, shift;
  414. for(i=0;i<3;i++) {
  415. shift = (i == 0) ? 0 : 1;
  416. component_resample(s, output->data[i], output->linesize[i],
  417. s->owidth >> shift, s->oheight >> shift,
  418. input->data[i], input->linesize[i],
  419. s->iwidth >> shift, s->iheight >> shift);
  420. }
  421. }
  422. void img_resample_close(ImgReSampleContext *s)
  423. {
  424. free(s->line_buf);
  425. free(s);
  426. }
  427. #ifdef TEST
  428. void *av_mallocz(int size)
  429. {
  430. void *ptr;
  431. ptr = malloc(size);
  432. memset(ptr, 0, size);
  433. return ptr;
  434. }
  435. /* input */
  436. #define XSIZE 256
  437. #define YSIZE 256
  438. UINT8 img[XSIZE * YSIZE];
  439. /* output */
  440. #define XSIZE1 512
  441. #define YSIZE1 512
  442. UINT8 img1[XSIZE1 * YSIZE1];
  443. UINT8 img2[XSIZE1 * YSIZE1];
  444. void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
  445. {
  446. FILE *f;
  447. f=fopen(filename,"w");
  448. fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
  449. fwrite(img,1, xsize * ysize,f);
  450. fclose(f);
  451. }
  452. static void dump_filter(INT16 *filter)
  453. {
  454. int i, ph;
  455. for(ph=0;ph<NB_PHASES;ph++) {
  456. printf("%2d: ", ph);
  457. for(i=0;i<NB_TAPS;i++) {
  458. printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
  459. }
  460. printf("\n");
  461. }
  462. }
  463. #ifdef CONFIG_MMX
  464. int mm_flags;
  465. #endif
  466. int main(int argc, char **argv)
  467. {
  468. int x, y, v, i, xsize, ysize;
  469. ImgReSampleContext *s;
  470. float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
  471. char buf[256];
  472. /* build test image */
  473. for(y=0;y<YSIZE;y++) {
  474. for(x=0;x<XSIZE;x++) {
  475. if (x < XSIZE/2 && y < YSIZE/2) {
  476. if (x < XSIZE/4 && y < YSIZE/4) {
  477. if ((x % 10) <= 6 &&
  478. (y % 10) <= 6)
  479. v = 0xff;
  480. else
  481. v = 0x00;
  482. } else if (x < XSIZE/4) {
  483. if (x & 1)
  484. v = 0xff;
  485. else
  486. v = 0;
  487. } else if (y < XSIZE/4) {
  488. if (y & 1)
  489. v = 0xff;
  490. else
  491. v = 0;
  492. } else {
  493. if (y < YSIZE*3/8) {
  494. if ((y+x) & 1)
  495. v = 0xff;
  496. else
  497. v = 0;
  498. } else {
  499. if (((x+3) % 4) <= 1 &&
  500. ((y+3) % 4) <= 1)
  501. v = 0xff;
  502. else
  503. v = 0x00;
  504. }
  505. }
  506. } else if (x < XSIZE/2) {
  507. v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
  508. } else if (y < XSIZE/2) {
  509. v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
  510. } else {
  511. v = ((x + y - XSIZE) * 255) / XSIZE;
  512. }
  513. img[y * XSIZE + x] = v;
  514. }
  515. }
  516. save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
  517. for(i=0;i<sizeof(factors)/sizeof(float);i++) {
  518. fact = factors[i];
  519. xsize = (int)(XSIZE * fact);
  520. ysize = (int)(YSIZE * fact);
  521. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  522. printf("Factor=%0.2f\n", fact);
  523. dump_filter(&s->h_filters[0][0]);
  524. component_resample(s, img1, xsize, xsize, ysize,
  525. img, XSIZE, XSIZE, YSIZE);
  526. img_resample_close(s);
  527. sprintf(buf, "/tmp/out%d.pgm", i);
  528. save_pgm(buf, img1, xsize, ysize);
  529. }
  530. /* mmx test */
  531. #ifdef CONFIG_MMX
  532. printf("MMX test\n");
  533. fact = 0.72;
  534. xsize = (int)(XSIZE * fact);
  535. ysize = (int)(YSIZE * fact);
  536. mm_flags = MM_MMX;
  537. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  538. component_resample(s, img1, xsize, xsize, ysize,
  539. img, XSIZE, XSIZE, YSIZE);
  540. mm_flags = 0;
  541. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  542. component_resample(s, img2, xsize, xsize, ysize,
  543. img, XSIZE, XSIZE, YSIZE);
  544. if (memcmp(img1, img2, xsize * ysize) != 0) {
  545. fprintf(stderr, "mmx error\n");
  546. exit(1);
  547. }
  548. printf("MMX OK\n");
  549. #endif
  550. return 0;
  551. }
  552. #endif