You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

770 lines
22KB

  1. /*
  2. * High quality image resampling with polyphase filters
  3. * Copyright (c) 2001 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include "avcodec.h"
  20. #include "dsputil.h"
  21. #ifdef USE_FASTMEMCPY
  22. #include "fastmemcpy.h"
  23. #endif
  24. #define NB_COMPONENTS 3
  25. #define PHASE_BITS 4
  26. #define NB_PHASES (1 << PHASE_BITS)
  27. #define NB_TAPS 4
  28. #define FCENTER 1 /* index of the center of the filter */
  29. //#define TEST 1 /* Test it */
  30. #define POS_FRAC_BITS 16
  31. #define POS_FRAC (1 << POS_FRAC_BITS)
  32. /* 6 bits precision is needed for MMX */
  33. #define FILTER_BITS 8
  34. #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  35. struct ImgReSampleContext {
  36. int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
  37. int h_incr, v_incr;
  38. int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  39. int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  40. uint8_t *line_buf;
  41. };
  42. static inline int get_phase(int pos)
  43. {
  44. return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  45. }
  46. /* This function must be optimized */
  47. static void h_resample_fast(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
  48. int src_start, int src_incr, int16_t *filters)
  49. {
  50. int src_pos, phase, sum, i;
  51. uint8_t *s;
  52. int16_t *filter;
  53. src_pos = src_start;
  54. for(i=0;i<dst_width;i++) {
  55. #ifdef TEST
  56. /* test */
  57. if ((src_pos >> POS_FRAC_BITS) < 0 ||
  58. (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  59. av_abort();
  60. #endif
  61. s = src + (src_pos >> POS_FRAC_BITS);
  62. phase = get_phase(src_pos);
  63. filter = filters + phase * NB_TAPS;
  64. #if NB_TAPS == 4
  65. sum = s[0] * filter[0] +
  66. s[1] * filter[1] +
  67. s[2] * filter[2] +
  68. s[3] * filter[3];
  69. #else
  70. {
  71. int j;
  72. sum = 0;
  73. for(j=0;j<NB_TAPS;j++)
  74. sum += s[j] * filter[j];
  75. }
  76. #endif
  77. sum = sum >> FILTER_BITS;
  78. if (sum < 0)
  79. sum = 0;
  80. else if (sum > 255)
  81. sum = 255;
  82. dst[0] = sum;
  83. src_pos += src_incr;
  84. dst++;
  85. }
  86. }
  87. /* This function must be optimized */
  88. static void v_resample(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
  89. int16_t *filter)
  90. {
  91. int sum, i;
  92. uint8_t *s;
  93. s = src;
  94. for(i=0;i<dst_width;i++) {
  95. #if NB_TAPS == 4
  96. sum = s[0 * wrap] * filter[0] +
  97. s[1 * wrap] * filter[1] +
  98. s[2 * wrap] * filter[2] +
  99. s[3 * wrap] * filter[3];
  100. #else
  101. {
  102. int j;
  103. uint8_t *s1 = s;
  104. sum = 0;
  105. for(j=0;j<NB_TAPS;j++) {
  106. sum += s1[0] * filter[j];
  107. s1 += wrap;
  108. }
  109. }
  110. #endif
  111. sum = sum >> FILTER_BITS;
  112. if (sum < 0)
  113. sum = 0;
  114. else if (sum > 255)
  115. sum = 255;
  116. dst[0] = sum;
  117. dst++;
  118. s++;
  119. }
  120. }
  121. #ifdef HAVE_MMX
  122. #include "i386/mmx.h"
  123. #define FILTER4(reg) \
  124. {\
  125. s = src + (src_pos >> POS_FRAC_BITS);\
  126. phase = get_phase(src_pos);\
  127. filter = filters + phase * NB_TAPS;\
  128. movq_m2r(*s, reg);\
  129. punpcklbw_r2r(mm7, reg);\
  130. movq_m2r(*filter, mm6);\
  131. pmaddwd_r2r(reg, mm6);\
  132. movq_r2r(mm6, reg);\
  133. psrlq_i2r(32, reg);\
  134. paddd_r2r(mm6, reg);\
  135. psrad_i2r(FILTER_BITS, reg);\
  136. src_pos += src_incr;\
  137. }
  138. #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
  139. /* XXX: do four pixels at a time */
  140. static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
  141. int src_start, int src_incr, int16_t *filters)
  142. {
  143. int src_pos, phase;
  144. uint8_t *s;
  145. int16_t *filter;
  146. mmx_t tmp;
  147. src_pos = src_start;
  148. pxor_r2r(mm7, mm7);
  149. while (dst_width >= 4) {
  150. FILTER4(mm0);
  151. FILTER4(mm1);
  152. FILTER4(mm2);
  153. FILTER4(mm3);
  154. packuswb_r2r(mm7, mm0);
  155. packuswb_r2r(mm7, mm1);
  156. packuswb_r2r(mm7, mm3);
  157. packuswb_r2r(mm7, mm2);
  158. movq_r2m(mm0, tmp);
  159. dst[0] = tmp.ub[0];
  160. movq_r2m(mm1, tmp);
  161. dst[1] = tmp.ub[0];
  162. movq_r2m(mm2, tmp);
  163. dst[2] = tmp.ub[0];
  164. movq_r2m(mm3, tmp);
  165. dst[3] = tmp.ub[0];
  166. dst += 4;
  167. dst_width -= 4;
  168. }
  169. while (dst_width > 0) {
  170. FILTER4(mm0);
  171. packuswb_r2r(mm7, mm0);
  172. movq_r2m(mm0, tmp);
  173. dst[0] = tmp.ub[0];
  174. dst++;
  175. dst_width--;
  176. }
  177. emms();
  178. }
  179. static void v_resample4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
  180. int16_t *filter)
  181. {
  182. int sum, i, v;
  183. uint8_t *s;
  184. mmx_t tmp;
  185. mmx_t coefs[4];
  186. for(i=0;i<4;i++) {
  187. v = filter[i];
  188. coefs[i].uw[0] = v;
  189. coefs[i].uw[1] = v;
  190. coefs[i].uw[2] = v;
  191. coefs[i].uw[3] = v;
  192. }
  193. pxor_r2r(mm7, mm7);
  194. s = src;
  195. while (dst_width >= 4) {
  196. movq_m2r(s[0 * wrap], mm0);
  197. punpcklbw_r2r(mm7, mm0);
  198. movq_m2r(s[1 * wrap], mm1);
  199. punpcklbw_r2r(mm7, mm1);
  200. movq_m2r(s[2 * wrap], mm2);
  201. punpcklbw_r2r(mm7, mm2);
  202. movq_m2r(s[3 * wrap], mm3);
  203. punpcklbw_r2r(mm7, mm3);
  204. pmullw_m2r(coefs[0], mm0);
  205. pmullw_m2r(coefs[1], mm1);
  206. pmullw_m2r(coefs[2], mm2);
  207. pmullw_m2r(coefs[3], mm3);
  208. paddw_r2r(mm1, mm0);
  209. paddw_r2r(mm3, mm2);
  210. paddw_r2r(mm2, mm0);
  211. psraw_i2r(FILTER_BITS, mm0);
  212. packuswb_r2r(mm7, mm0);
  213. movq_r2m(mm0, tmp);
  214. *(uint32_t *)dst = tmp.ud[0];
  215. dst += 4;
  216. s += 4;
  217. dst_width -= 4;
  218. }
  219. while (dst_width > 0) {
  220. sum = s[0 * wrap] * filter[0] +
  221. s[1 * wrap] * filter[1] +
  222. s[2 * wrap] * filter[2] +
  223. s[3 * wrap] * filter[3];
  224. sum = sum >> FILTER_BITS;
  225. if (sum < 0)
  226. sum = 0;
  227. else if (sum > 255)
  228. sum = 255;
  229. dst[0] = sum;
  230. dst++;
  231. s++;
  232. dst_width--;
  233. }
  234. emms();
  235. }
  236. #endif
  237. #ifdef HAVE_ALTIVEC
  238. typedef union {
  239. vector unsigned char v;
  240. unsigned char c[16];
  241. } vec_uc_t;
  242. typedef union {
  243. vector signed short v;
  244. signed short s[8];
  245. } vec_ss_t;
  246. void v_resample16_altivec(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
  247. int16_t *filter)
  248. {
  249. int sum, i;
  250. uint8_t *s;
  251. vector unsigned char *tv, tmp, dstv, zero;
  252. vec_ss_t srchv[4], srclv[4], fv[4];
  253. vector signed short zeros, sumhv, sumlv;
  254. s = src;
  255. for(i=0;i<4;i++)
  256. {
  257. /*
  258. The vec_madds later on does an implicit >>15 on the result.
  259. Since FILTER_BITS is 8, and we have 15 bits of magnitude in
  260. a signed short, we have just enough bits to pre-shift our
  261. filter constants <<7 to compensate for vec_madds.
  262. */
  263. fv[i].s[0] = filter[i] << (15-FILTER_BITS);
  264. fv[i].v = vec_splat(fv[i].v, 0);
  265. }
  266. zero = vec_splat_u8(0);
  267. zeros = vec_splat_s16(0);
  268. /*
  269. When we're resampling, we'd ideally like both our input buffers,
  270. and output buffers to be 16-byte aligned, so we can do both aligned
  271. reads and writes. Sadly we can't always have this at the moment, so
  272. we opt for aligned writes, as unaligned writes have a huge overhead.
  273. To do this, do enough scalar resamples to get dst 16-byte aligned.
  274. */
  275. i = (-(int)dst) & 0xf;
  276. while(i>0) {
  277. sum = s[0 * wrap] * filter[0] +
  278. s[1 * wrap] * filter[1] +
  279. s[2 * wrap] * filter[2] +
  280. s[3 * wrap] * filter[3];
  281. sum = sum >> FILTER_BITS;
  282. if (sum<0) sum = 0; else if (sum>255) sum=255;
  283. dst[0] = sum;
  284. dst++;
  285. s++;
  286. dst_width--;
  287. i--;
  288. }
  289. /* Do our altivec resampling on 16 pixels at once. */
  290. while(dst_width>=16) {
  291. /*
  292. Read 16 (potentially unaligned) bytes from each of
  293. 4 lines into 4 vectors, and split them into shorts.
  294. Interleave the multipy/accumulate for the resample
  295. filter with the loads to hide the 3 cycle latency
  296. the vec_madds have.
  297. */
  298. tv = (vector unsigned char *) &s[0 * wrap];
  299. tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
  300. srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
  301. srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
  302. sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
  303. sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
  304. tv = (vector unsigned char *) &s[1 * wrap];
  305. tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
  306. srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
  307. srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
  308. sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
  309. sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
  310. tv = (vector unsigned char *) &s[2 * wrap];
  311. tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
  312. srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
  313. srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
  314. sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
  315. sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
  316. tv = (vector unsigned char *) &s[3 * wrap];
  317. tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
  318. srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
  319. srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
  320. sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
  321. sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
  322. /*
  323. Pack the results into our destination vector,
  324. and do an aligned write of that back to memory.
  325. */
  326. dstv = vec_packsu(sumhv, sumlv) ;
  327. vec_st(dstv, 0, (vector unsigned char *) dst);
  328. dst+=16;
  329. s+=16;
  330. dst_width-=16;
  331. }
  332. /*
  333. If there are any leftover pixels, resample them
  334. with the slow scalar method.
  335. */
  336. while(dst_width>0) {
  337. sum = s[0 * wrap] * filter[0] +
  338. s[1 * wrap] * filter[1] +
  339. s[2 * wrap] * filter[2] +
  340. s[3 * wrap] * filter[3];
  341. sum = sum >> FILTER_BITS;
  342. if (sum<0) sum = 0; else if (sum>255) sum=255;
  343. dst[0] = sum;
  344. dst++;
  345. s++;
  346. dst_width--;
  347. }
  348. }
  349. #endif
  350. /* slow version to handle limit cases. Does not need optimisation */
  351. static void h_resample_slow(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
  352. int src_start, int src_incr, int16_t *filters)
  353. {
  354. int src_pos, phase, sum, j, v, i;
  355. uint8_t *s, *src_end;
  356. int16_t *filter;
  357. src_end = src + src_width;
  358. src_pos = src_start;
  359. for(i=0;i<dst_width;i++) {
  360. s = src + (src_pos >> POS_FRAC_BITS);
  361. phase = get_phase(src_pos);
  362. filter = filters + phase * NB_TAPS;
  363. sum = 0;
  364. for(j=0;j<NB_TAPS;j++) {
  365. if (s < src)
  366. v = src[0];
  367. else if (s >= src_end)
  368. v = src_end[-1];
  369. else
  370. v = s[0];
  371. sum += v * filter[j];
  372. s++;
  373. }
  374. sum = sum >> FILTER_BITS;
  375. if (sum < 0)
  376. sum = 0;
  377. else if (sum > 255)
  378. sum = 255;
  379. dst[0] = sum;
  380. src_pos += src_incr;
  381. dst++;
  382. }
  383. }
  384. static void h_resample(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
  385. int src_start, int src_incr, int16_t *filters)
  386. {
  387. int n, src_end;
  388. if (src_start < 0) {
  389. n = (0 - src_start + src_incr - 1) / src_incr;
  390. h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
  391. dst += n;
  392. dst_width -= n;
  393. src_start += n * src_incr;
  394. }
  395. src_end = src_start + dst_width * src_incr;
  396. if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
  397. n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
  398. src_incr;
  399. } else {
  400. n = dst_width;
  401. }
  402. #ifdef HAVE_MMX
  403. if ((mm_flags & MM_MMX) && NB_TAPS == 4)
  404. h_resample_fast4_mmx(dst, n,
  405. src, src_width, src_start, src_incr, filters);
  406. else
  407. #endif
  408. h_resample_fast(dst, n,
  409. src, src_width, src_start, src_incr, filters);
  410. if (n < dst_width) {
  411. dst += n;
  412. dst_width -= n;
  413. src_start += n * src_incr;
  414. h_resample_slow(dst, dst_width,
  415. src, src_width, src_start, src_incr, filters);
  416. }
  417. }
  418. static void component_resample(ImgReSampleContext *s,
  419. uint8_t *output, int owrap, int owidth, int oheight,
  420. uint8_t *input, int iwrap, int iwidth, int iheight)
  421. {
  422. int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
  423. uint8_t *new_line, *src_line;
  424. last_src_y = - FCENTER - 1;
  425. /* position of the bottom of the filter in the source image */
  426. src_y = (last_src_y + NB_TAPS) * POS_FRAC;
  427. ring_y = NB_TAPS; /* position in ring buffer */
  428. for(y=0;y<oheight;y++) {
  429. /* apply horizontal filter on new lines from input if needed */
  430. src_y1 = src_y >> POS_FRAC_BITS;
  431. while (last_src_y < src_y1) {
  432. if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
  433. ring_y = NB_TAPS;
  434. last_src_y++;
  435. /* handle limit conditions : replicate line (slightly
  436. inefficient because we filter multiple times) */
  437. y1 = last_src_y;
  438. if (y1 < 0) {
  439. y1 = 0;
  440. } else if (y1 >= iheight) {
  441. y1 = iheight - 1;
  442. }
  443. src_line = input + y1 * iwrap;
  444. new_line = s->line_buf + ring_y * owidth;
  445. /* apply filter and handle limit cases correctly */
  446. h_resample(new_line, owidth,
  447. src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
  448. &s->h_filters[0][0]);
  449. /* handle ring buffer wraping */
  450. if (ring_y >= LINE_BUF_HEIGHT) {
  451. memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
  452. new_line, owidth);
  453. }
  454. }
  455. /* apply vertical filter */
  456. phase_y = get_phase(src_y);
  457. #ifdef HAVE_MMX
  458. /* desactivated MMX because loss of precision */
  459. if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
  460. v_resample4_mmx(output, owidth,
  461. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  462. &s->v_filters[phase_y][0]);
  463. else
  464. #endif
  465. #ifdef HAVE_ALTIVEC
  466. if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
  467. v_resample16_altivec(output, owidth,
  468. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  469. &s->v_filters[phase_y][0]);
  470. else
  471. #endif
  472. v_resample(output, owidth,
  473. s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
  474. &s->v_filters[phase_y][0]);
  475. src_y += s->v_incr;
  476. output += owrap;
  477. }
  478. }
  479. /* XXX: the following filter is quite naive, but it seems to suffice
  480. for 4 taps */
  481. static void build_filter(int16_t *filter, float factor)
  482. {
  483. int ph, i, v;
  484. float x, y, tab[NB_TAPS], norm, mult;
  485. /* if upsampling, only need to interpolate, no filter */
  486. if (factor > 1.0)
  487. factor = 1.0;
  488. for(ph=0;ph<NB_PHASES;ph++) {
  489. norm = 0;
  490. for(i=0;i<NB_TAPS;i++) {
  491. x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
  492. if (x == 0)
  493. y = 1.0;
  494. else
  495. y = sin(x) / x;
  496. tab[i] = y;
  497. norm += y;
  498. }
  499. /* normalize so that an uniform color remains the same */
  500. mult = (float)(1 << FILTER_BITS) / norm;
  501. for(i=0;i<NB_TAPS;i++) {
  502. v = (int)(tab[i] * mult);
  503. filter[ph * NB_TAPS + i] = v;
  504. }
  505. }
  506. }
  507. ImgReSampleContext *img_resample_init(int owidth, int oheight,
  508. int iwidth, int iheight)
  509. {
  510. return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
  511. }
  512. ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
  513. int iwidth, int iheight,
  514. int topBand, int bottomBand,
  515. int leftBand, int rightBand)
  516. {
  517. ImgReSampleContext *s;
  518. s = av_mallocz(sizeof(ImgReSampleContext));
  519. if (!s)
  520. return NULL;
  521. s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
  522. if (!s->line_buf)
  523. goto fail;
  524. s->owidth = owidth;
  525. s->oheight = oheight;
  526. s->iwidth = iwidth;
  527. s->iheight = iheight;
  528. s->topBand = topBand;
  529. s->bottomBand = bottomBand;
  530. s->leftBand = leftBand;
  531. s->rightBand = rightBand;
  532. s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
  533. s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
  534. build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand));
  535. build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
  536. return s;
  537. fail:
  538. av_free(s);
  539. return NULL;
  540. }
  541. void img_resample(ImgReSampleContext *s,
  542. AVPicture *output, AVPicture *input)
  543. {
  544. int i, shift;
  545. for(i=0;i<3;i++) {
  546. shift = (i == 0) ? 0 : 1;
  547. component_resample(s, output->data[i], output->linesize[i],
  548. s->owidth >> shift, s->oheight >> shift,
  549. input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
  550. input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
  551. (s->iheight - s->topBand - s->bottomBand) >> shift);
  552. }
  553. }
  554. void img_resample_close(ImgReSampleContext *s)
  555. {
  556. av_free(s->line_buf);
  557. av_free(s);
  558. }
  559. #ifdef TEST
  560. void *av_mallocz(int size)
  561. {
  562. void *ptr;
  563. ptr = malloc(size);
  564. memset(ptr, 0, size);
  565. return ptr;
  566. }
  567. void av_free(void *ptr)
  568. {
  569. /* XXX: this test should not be needed on most libcs */
  570. if (ptr)
  571. free(ptr);
  572. }
  573. /* input */
  574. #define XSIZE 256
  575. #define YSIZE 256
  576. uint8_t img[XSIZE * YSIZE];
  577. /* output */
  578. #define XSIZE1 512
  579. #define YSIZE1 512
  580. uint8_t img1[XSIZE1 * YSIZE1];
  581. uint8_t img2[XSIZE1 * YSIZE1];
  582. void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
  583. {
  584. FILE *f;
  585. f=fopen(filename,"w");
  586. fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
  587. fwrite(img,1, xsize * ysize,f);
  588. fclose(f);
  589. }
  590. static void dump_filter(int16_t *filter)
  591. {
  592. int i, ph;
  593. for(ph=0;ph<NB_PHASES;ph++) {
  594. printf("%2d: ", ph);
  595. for(i=0;i<NB_TAPS;i++) {
  596. printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
  597. }
  598. printf("\n");
  599. }
  600. }
  601. #ifdef HAVE_MMX
  602. int mm_flags;
  603. #endif
  604. int main(int argc, char **argv)
  605. {
  606. int x, y, v, i, xsize, ysize;
  607. ImgReSampleContext *s;
  608. float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
  609. char buf[256];
  610. /* build test image */
  611. for(y=0;y<YSIZE;y++) {
  612. for(x=0;x<XSIZE;x++) {
  613. if (x < XSIZE/2 && y < YSIZE/2) {
  614. if (x < XSIZE/4 && y < YSIZE/4) {
  615. if ((x % 10) <= 6 &&
  616. (y % 10) <= 6)
  617. v = 0xff;
  618. else
  619. v = 0x00;
  620. } else if (x < XSIZE/4) {
  621. if (x & 1)
  622. v = 0xff;
  623. else
  624. v = 0;
  625. } else if (y < XSIZE/4) {
  626. if (y & 1)
  627. v = 0xff;
  628. else
  629. v = 0;
  630. } else {
  631. if (y < YSIZE*3/8) {
  632. if ((y+x) & 1)
  633. v = 0xff;
  634. else
  635. v = 0;
  636. } else {
  637. if (((x+3) % 4) <= 1 &&
  638. ((y+3) % 4) <= 1)
  639. v = 0xff;
  640. else
  641. v = 0x00;
  642. }
  643. }
  644. } else if (x < XSIZE/2) {
  645. v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
  646. } else if (y < XSIZE/2) {
  647. v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
  648. } else {
  649. v = ((x + y - XSIZE) * 255) / XSIZE;
  650. }
  651. img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
  652. }
  653. }
  654. save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
  655. for(i=0;i<sizeof(factors)/sizeof(float);i++) {
  656. fact = factors[i];
  657. xsize = (int)(XSIZE * fact);
  658. ysize = (int)((YSIZE - 100) * fact);
  659. s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
  660. printf("Factor=%0.2f\n", fact);
  661. dump_filter(&s->h_filters[0][0]);
  662. component_resample(s, img1, xsize, xsize, ysize,
  663. img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
  664. img_resample_close(s);
  665. sprintf(buf, "/tmp/out%d.pgm", i);
  666. save_pgm(buf, img1, xsize, ysize);
  667. }
  668. /* mmx test */
  669. #ifdef HAVE_MMX
  670. printf("MMX test\n");
  671. fact = 0.72;
  672. xsize = (int)(XSIZE * fact);
  673. ysize = (int)(YSIZE * fact);
  674. mm_flags = MM_MMX;
  675. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  676. component_resample(s, img1, xsize, xsize, ysize,
  677. img, XSIZE, XSIZE, YSIZE);
  678. mm_flags = 0;
  679. s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
  680. component_resample(s, img2, xsize, xsize, ysize,
  681. img, XSIZE, XSIZE, YSIZE);
  682. if (memcmp(img1, img2, xsize * ysize) != 0) {
  683. fprintf(stderr, "mmx error\n");
  684. exit(1);
  685. }
  686. printf("MMX OK\n");
  687. #endif
  688. return 0;
  689. }
  690. #endif