You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
3.7KB

  1. /*
  2. * MMX optimized DSP utils
  3. * Copyright (c) 2000, 2001 Gerard Lantau.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. *
  19. */
  20. #define TESTCPU_MAIN
  21. #include "dsputil.h"
  22. //#include "../libavcodec/dsputil.c"
  23. #include "../libavcodec/i386/cputest.c"
  24. #include "../libavcodec/i386/dsputil_mmx.c"
  25. #undef TESTCPU_MAIN
  26. #define PAD 0x10000
  27. /*
  28. * for testing speed of various routine - should be probably extended
  29. * for a general purpose regression test later
  30. *
  31. * currently only for i386 - FIXME
  32. */
  33. #define PIX_FUNC_C(a) \
  34. { #a "_c", a ## _c, 0 }, \
  35. { #a "_mmx", a ## _mmx, MM_MMX }, \
  36. { #a "_mmx2", a ## _mmx2, MM_MMXEXT | PAD }
  37. #define PIX_FUNC(a) \
  38. { #a "_mmx", a ## _mmx, MM_MMX }, \
  39. { #a "_3dnow", a ## _3dnow, MM_3DNOW }, \
  40. { #a "_mmx2", a ## _mmx2, MM_MMXEXT | PAD }
  41. #define PIX_FUNC_MMX(a) \
  42. { #a "_mmx", a ## _mmx, MM_MMX | PAD }
  43. /*
  44. PIX_FUNC_C(pix_abs16x16),
  45. PIX_FUNC_C(pix_abs16x16_x2),
  46. PIX_FUNC_C(pix_abs16x16_y2),
  47. PIX_FUNC_C(pix_abs16x16_xy2),
  48. PIX_FUNC_C(pix_abs8x8),
  49. PIX_FUNC_C(pix_abs8x8_x2),
  50. PIX_FUNC_C(pix_abs8x8_y2),
  51. PIX_FUNC_C(pix_abs8x8_xy2),
  52. */
  53. static const struct pix_func {
  54. char* name;
  55. op_pixels_func func;
  56. int mm_flags;
  57. } pix_func[] = {
  58. PIX_FUNC_MMX(put_pixels),
  59. #if 1
  60. PIX_FUNC(put_pixels_x2),
  61. PIX_FUNC(put_pixels_y2),
  62. PIX_FUNC_MMX(put_pixels_xy2),
  63. PIX_FUNC(put_no_rnd_pixels_x2),
  64. PIX_FUNC(put_no_rnd_pixels_y2),
  65. PIX_FUNC_MMX(put_no_rnd_pixels_xy2),
  66. PIX_FUNC(avg_pixels),
  67. PIX_FUNC(avg_pixels_x2),
  68. PIX_FUNC(avg_pixels_y2),
  69. PIX_FUNC(avg_pixels_xy2),
  70. #endif
  71. { 0, 0 }
  72. };
  73. static inline long long rdtsc()
  74. {
  75. long long l;
  76. asm volatile( "rdtsc\n\t"
  77. : "=A" (l)
  78. );
  79. return l;
  80. }
  81. static test_speed(int step)
  82. {
  83. const struct pix_func* pix = pix_func;
  84. const int linesize = 720;
  85. char empty[32768];
  86. char* bu =(char*)(((long)empty + 32) & ~0xf);
  87. int sum = 0;
  88. while (pix->name)
  89. {
  90. int i;
  91. uint64_t te, ts;
  92. op_pixels_func func = pix->func;
  93. char* im = bu;
  94. if (!(pix->mm_flags & mm_flags))
  95. continue;
  96. printf("%30s... ", pix->name);
  97. fflush(stdout);
  98. ts = rdtsc();
  99. for(i=0; i<100000; i++){
  100. func(im, im + 1000, linesize, 16);
  101. im += step;
  102. if (im > bu + 20000)
  103. im = bu;
  104. }
  105. te = rdtsc();
  106. emms();
  107. printf("% 9d\n", (int)(te - ts));
  108. sum += (te - ts) / 100000;
  109. if (pix->mm_flags & PAD)
  110. puts("");
  111. pix++;
  112. }
  113. printf("Total sum: %d\n", sum);
  114. }
  115. int main(int argc, char* argv[])
  116. {
  117. int step = 16;
  118. if (argc > 1)
  119. {
  120. // something simple for now
  121. if (argc > 2 && (strcmp("-s", argv[1]) == 0
  122. || strcmp("-step", argv[1]) == 0))
  123. step = atoi(argv[2]);
  124. }
  125. mm_flags = mm_support();
  126. printf("%s: detected CPU flags:", argv[0]);
  127. if (mm_flags & MM_MMX)
  128. printf(" mmx");
  129. if (mm_flags & MM_MMXEXT)
  130. printf(" mmxext");
  131. if (mm_flags & MM_3DNOW)
  132. printf(" 3dnow");
  133. if (mm_flags & MM_SSE)
  134. printf(" sse");
  135. if (mm_flags & MM_SSE2)
  136. printf(" sse2");
  137. printf("\n");
  138. printf("Using step: %d\n", step);
  139. test_speed(step);
  140. }