You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.5KB

  1. ;******************************************************************************
  2. ;* MMX optimized DSP utils
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;* Copyright (c) 2003-2013 Michael Niedermayer
  5. ;* Copyright (c) 2013 Daniel Kang
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;******************************************************************************
  23. %include "libavutil/x86/x86util.asm"
  24. SECTION_RODATA
  25. cextern pb_80
  26. SECTION_TEXT
  27. ;--------------------------------------------------------------------------
  28. ;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
  29. ; int line_size)
  30. ;--------------------------------------------------------------------------
  31. %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
  32. mova m1, [blockq+mmsize*0+%1]
  33. mova m2, [blockq+mmsize*2+%1]
  34. %if mmsize == 8
  35. mova m3, [blockq+mmsize*4+%1]
  36. mova m4, [blockq+mmsize*6+%1]
  37. %endif
  38. packsswb m1, [blockq+mmsize*1+%1]
  39. packsswb m2, [blockq+mmsize*3+%1]
  40. %if mmsize == 8
  41. packsswb m3, [blockq+mmsize*5+%1]
  42. packsswb m4, [blockq+mmsize*7+%1]
  43. %endif
  44. paddb m1, m0
  45. paddb m2, m0
  46. %if mmsize == 8
  47. paddb m3, m0
  48. paddb m4, m0
  49. movq [pixelsq+lsizeq*0], m1
  50. movq [pixelsq+lsizeq*1], m2
  51. movq [pixelsq+lsizeq*2], m3
  52. movq [pixelsq+lsize3q ], m4
  53. %else
  54. movq [pixelsq+lsizeq*0], m1
  55. movhps [pixelsq+lsizeq*1], m1
  56. movq [pixelsq+lsizeq*2], m2
  57. movhps [pixelsq+lsize3q ], m2
  58. %endif
  59. %endmacro
  60. %macro PUT_SIGNED_PIXELS_CLAMPED 1
  61. cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
  62. mova m0, [pb_80]
  63. lea lsize3q, [lsizeq*3]
  64. PUT_SIGNED_PIXELS_CLAMPED_HALF 0
  65. lea pixelsq, [pixelsq+lsizeq*4]
  66. PUT_SIGNED_PIXELS_CLAMPED_HALF 64
  67. RET
  68. %endmacro
  69. INIT_MMX mmx
  70. PUT_SIGNED_PIXELS_CLAMPED 0
  71. INIT_XMM sse2
  72. PUT_SIGNED_PIXELS_CLAMPED 3