You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

141 lines
4.8KB

  1. /*
  2. * MMX-optimized avg/put pixel routines
  3. *
  4. * Copyright (c) 2000, 2001 Fabrice Bellard
  5. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  6. *
  7. * This file is part of Libav.
  8. *
  9. * Libav is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * Libav is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with Libav; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include <stddef.h>
  24. #include <stdint.h>
  25. #include "config.h"
  26. #include "fpel.h"
  27. #include "inline_asm.h"
  28. #if HAVE_MMX_INLINE
  29. // in case more speed is needed - unrolling would certainly help
  30. void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  31. ptrdiff_t line_size, int h)
  32. {
  33. MOVQ_BFE(mm6);
  34. JUMPALIGN();
  35. do {
  36. __asm__ volatile(
  37. "movq %0, %%mm0 \n\t"
  38. "movq %1, %%mm1 \n\t"
  39. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  40. "movq %%mm2, %0 \n\t"
  41. :"+m"(*block)
  42. :"m"(*pixels)
  43. :"memory");
  44. pixels += line_size;
  45. block += line_size;
  46. }
  47. while (--h);
  48. }
  49. void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  50. ptrdiff_t line_size, int h)
  51. {
  52. MOVQ_BFE(mm6);
  53. JUMPALIGN();
  54. do {
  55. __asm__ volatile(
  56. "movq %0, %%mm0 \n\t"
  57. "movq %1, %%mm1 \n\t"
  58. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  59. "movq %%mm2, %0 \n\t"
  60. "movq 8%0, %%mm0 \n\t"
  61. "movq 8%1, %%mm1 \n\t"
  62. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  63. "movq %%mm2, 8%0 \n\t"
  64. :"+m"(*block)
  65. :"m"(*pixels)
  66. :"memory");
  67. pixels += line_size;
  68. block += line_size;
  69. }
  70. while (--h);
  71. }
  72. void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  73. ptrdiff_t line_size, int h)
  74. {
  75. __asm__ volatile (
  76. "lea (%3, %3), %%"FF_REG_a" \n\t"
  77. ".p2align 3 \n\t"
  78. "1: \n\t"
  79. "movq (%1 ), %%mm0 \n\t"
  80. "movq (%1, %3), %%mm1 \n\t"
  81. "movq %%mm0, (%2) \n\t"
  82. "movq %%mm1, (%2, %3) \n\t"
  83. "add %%"FF_REG_a", %1 \n\t"
  84. "add %%"FF_REG_a", %2 \n\t"
  85. "movq (%1 ), %%mm0 \n\t"
  86. "movq (%1, %3), %%mm1 \n\t"
  87. "movq %%mm0, (%2) \n\t"
  88. "movq %%mm1, (%2, %3) \n\t"
  89. "add %%"FF_REG_a", %1 \n\t"
  90. "add %%"FF_REG_a", %2 \n\t"
  91. "subl $4, %0 \n\t"
  92. "jnz 1b \n\t"
  93. : "+g"(h), "+r"(pixels), "+r"(block)
  94. : "r"((x86_reg)line_size)
  95. : "%"FF_REG_a, "memory"
  96. );
  97. }
  98. void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  99. ptrdiff_t line_size, int h)
  100. {
  101. __asm__ volatile (
  102. "lea (%3, %3), %%"FF_REG_a" \n\t"
  103. ".p2align 3 \n\t"
  104. "1: \n\t"
  105. "movq (%1 ), %%mm0 \n\t"
  106. "movq 8(%1 ), %%mm4 \n\t"
  107. "movq (%1, %3), %%mm1 \n\t"
  108. "movq 8(%1, %3), %%mm5 \n\t"
  109. "movq %%mm0, (%2) \n\t"
  110. "movq %%mm4, 8(%2) \n\t"
  111. "movq %%mm1, (%2, %3) \n\t"
  112. "movq %%mm5, 8(%2, %3) \n\t"
  113. "add %%"FF_REG_a", %1 \n\t"
  114. "add %%"FF_REG_a", %2 \n\t"
  115. "movq (%1 ), %%mm0 \n\t"
  116. "movq 8(%1 ), %%mm4 \n\t"
  117. "movq (%1, %3), %%mm1 \n\t"
  118. "movq 8(%1, %3), %%mm5 \n\t"
  119. "movq %%mm0, (%2) \n\t"
  120. "movq %%mm4, 8(%2) \n\t"
  121. "movq %%mm1, (%2, %3) \n\t"
  122. "movq %%mm5, 8(%2, %3) \n\t"
  123. "add %%"FF_REG_a", %1 \n\t"
  124. "add %%"FF_REG_a", %2 \n\t"
  125. "subl $4, %0 \n\t"
  126. "jnz 1b \n\t"
  127. : "+g"(h), "+r"(pixels), "+r"(block)
  128. : "r"((x86_reg)line_size)
  129. : "%"FF_REG_a, "memory"
  130. );
  131. }
  132. #endif /* HAVE_MMX_INLINE */