You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
2.7KB

  1. ;*****************************************************************************
  2. ;* SIMD-optimized MPEG encoding functions
  3. ;*****************************************************************************
  4. ;* Copyright (c) 2000, 2001 Fabrice Bellard
  5. ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  6. ;*
  7. ;* This file is part of Libav.
  8. ;*
  9. ;* Libav is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* Libav is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with Libav; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;*****************************************************************************
  23. %include "libavutil/x86/x86util.asm"
  24. SECTION .text
  25. INIT_MMX mmx
  26. ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
  27. cglobal pix_sum16, 2, 3
  28. movsxdifnidn r1, r1d
  29. mov r2, r1
  30. neg r2
  31. shl r2, 4
  32. sub r0, r2
  33. pxor m7, m7
  34. pxor m6, m6
  35. .loop:
  36. mova m0, [r0+r2+0]
  37. mova m1, [r0+r2+0]
  38. mova m2, [r0+r2+8]
  39. mova m3, [r0+r2+8]
  40. punpcklbw m0, m7
  41. punpckhbw m1, m7
  42. punpcklbw m2, m7
  43. punpckhbw m3, m7
  44. paddw m1, m0
  45. paddw m3, m2
  46. paddw m3, m1
  47. paddw m6, m3
  48. add r2, r1
  49. js .loop
  50. mova m5, m6
  51. psrlq m6, 32
  52. paddw m6, m5
  53. mova m5, m6
  54. psrlq m6, 16
  55. paddw m6, m5
  56. movd eax, m6
  57. and eax, 0xffff
  58. RET
  59. INIT_MMX mmx
  60. ; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
  61. cglobal pix_norm1, 2, 4
  62. movsxdifnidn r1, r1d
  63. mov r2, 16
  64. pxor m0, m0
  65. pxor m7, m7
  66. .loop:
  67. mova m2, [r0+0]
  68. mova m3, [r0+8]
  69. mova m1, m2
  70. punpckhbw m1, m0
  71. punpcklbw m2, m0
  72. mova m4, m3
  73. punpckhbw m3, m0
  74. punpcklbw m4, m0
  75. pmaddwd m1, m1
  76. pmaddwd m2, m2
  77. pmaddwd m3, m3
  78. pmaddwd m4, m4
  79. paddd m2, m1
  80. paddd m4, m3
  81. paddd m7, m2
  82. add r0, r1
  83. paddd m7, m4
  84. dec r2
  85. jne .loop
  86. mova m1, m7
  87. psrlq m7, 32
  88. paddd m1, m7
  89. movd eax, m1
  90. RET