You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.6KB

  1. /*
  2. * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/x86/asm.h"
  21. #include "libavutil/cpu.h"
  22. #include "libswresample/swresample_internal.h"
  23. #define COMMON_CORE_DBL_SSE2 \
  24. x86_reg len= -8*c->filter_length;\
  25. __asm__ volatile(\
  26. "xorpd %%xmm0, %%xmm0 \n\t"\
  27. "1: \n\t"\
  28. "movupd (%1, %0), %%xmm1 \n\t"\
  29. "mulpd (%2, %0), %%xmm1 \n\t"\
  30. "addpd %%xmm1, %%xmm0 \n\t"\
  31. "add $16, %0 \n\t"\
  32. " js 1b \n\t"\
  33. "movhlps %%xmm0, %%xmm1 \n\t"\
  34. "addpd %%xmm1, %%xmm0 \n\t"\
  35. "movsd %%xmm0, (%3) \n\t"\
  36. : "+r" (len)\
  37. : "r" (((uint8_t*)(src+sample_index))-len),\
  38. "r" (((uint8_t*)filter)-len),\
  39. "r" (dst+dst_index)\
  40. XMM_CLOBBERS_ONLY("%xmm0", "%xmm1")\
  41. );
  42. #define LINEAR_CORE_DBL_SSE2 \
  43. x86_reg len= -8*c->filter_length;\
  44. __asm__ volatile(\
  45. "xorpd %%xmm0, %%xmm0 \n\t"\
  46. "xorpd %%xmm2, %%xmm2 \n\t"\
  47. "1: \n\t"\
  48. "movupd (%3, %0), %%xmm1 \n\t"\
  49. "movapd %%xmm1, %%xmm3 \n\t"\
  50. "mulpd (%4, %0), %%xmm1 \n\t"\
  51. "mulpd (%5, %0), %%xmm3 \n\t"\
  52. "addpd %%xmm1, %%xmm0 \n\t"\
  53. "addpd %%xmm3, %%xmm2 \n\t"\
  54. "add $16, %0 \n\t"\
  55. " js 1b \n\t"\
  56. "movhlps %%xmm0, %%xmm1 \n\t"\
  57. "movhlps %%xmm2, %%xmm3 \n\t"\
  58. "addpd %%xmm1, %%xmm0 \n\t"\
  59. "addpd %%xmm3, %%xmm2 \n\t"\
  60. "movsd %%xmm0, %1 \n\t"\
  61. "movsd %%xmm2, %2 \n\t"\
  62. : "+r" (len),\
  63. "=m" (val),\
  64. "=m" (v2)\
  65. : "r" (((uint8_t*)(src+sample_index))-len),\
  66. "r" (((uint8_t*)filter)-len),\
  67. "r" (((uint8_t*)(filter+c->filter_alloc))-len)\
  68. XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\
  69. );