You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

169 lines
7.5KB

  1. /*
  2. * Alpha optimized DSP utils
  3. * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #ifndef LIBAVCODEC_ALPHA_ASM_H
  20. #define LIBAVCODEC_ALPHA_ASM_H
  21. #include <inttypes.h>
  22. #if defined __GNUC__
  23. # define GNUC_PREREQ(maj, min) \
  24. ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  25. #else
  26. # define GNUC_PREREQ(maj, min) 0
  27. #endif
  28. #if GNUC_PREREQ(2,96)
  29. # define likely(x) __builtin_expect((x) != 0, 1)
  30. # define unlikely(x) __builtin_expect((x) != 0, 0)
  31. #else
  32. # define likely(x) (x)
  33. # define unlikely(x) (x)
  34. #endif
  35. #define AMASK_BWX (1 << 0)
  36. #define AMASK_FIX (1 << 1)
  37. #define AMASK_CIX (1 << 2)
  38. #define AMASK_MVI (1 << 8)
  39. inline static uint64_t BYTE_VEC(uint64_t x)
  40. {
  41. x |= x << 8;
  42. x |= x << 16;
  43. x |= x << 32;
  44. return x;
  45. }
  46. inline static uint64_t WORD_VEC(uint64_t x)
  47. {
  48. x |= x << 16;
  49. x |= x << 32;
  50. return x;
  51. }
  52. #define ldq(p) (*(const uint64_t *) (p))
  53. #define ldl(p) (*(const int32_t *) (p))
  54. #define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
  55. #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
  56. #define sextw(x) ((int16_t) (x))
  57. #ifdef __GNUC__
  58. #define ASM_ACCEPT_MVI asm (".arch pca56")
  59. struct unaligned_long { uint64_t l; } __attribute__((packed));
  60. #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
  61. #define uldq(a) (((const struct unaligned_long *) (a))->l)
  62. #if GNUC_PREREQ(3,0)
  63. /* Unfortunately, __builtin_prefetch is slightly buggy on Alpha. The
  64. defines here are kludged so we still get the right
  65. instruction. This needs to be adapted as soon as gcc is fixed. */
  66. # define prefetch(p) __builtin_prefetch((p), 0, 1)
  67. # define prefetch_en(p) __builtin_prefetch((p), 1, 1)
  68. # define prefetch_m(p) __builtin_prefetch((p), 0, 0)
  69. # define prefetch_men(p) __builtin_prefetch((p), 1, 0)
  70. #else
  71. # define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
  72. # define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
  73. # define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
  74. # define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
  75. #endif
  76. #if GNUC_PREREQ(3,3)
  77. #define cmpbge __builtin_alpha_cmpbge
  78. /* Avoid warnings. */
  79. #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
  80. #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
  81. #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
  82. #define zap __builtin_alpha_zap
  83. #define zapnot __builtin_alpha_zapnot
  84. #define amask __builtin_alpha_amask
  85. #define implver __builtin_alpha_implver
  86. #define rpcc __builtin_alpha_rpcc
  87. #define minub8 __builtin_alpha_minub8
  88. #define minsb8 __builtin_alpha_minsb8
  89. #define minuw4 __builtin_alpha_minuw4
  90. #define minsw4 __builtin_alpha_minsw4
  91. #define maxub8 __builtin_alpha_maxub8
  92. #define maxsb8 __builtin_alpha_maxsb8
  93. #define maxuw4 __builtin_alpha_maxuw4
  94. #define maxsw4 __builtin_alpha_maxsw4
  95. #define perr __builtin_alpha_perr
  96. #define pklb __builtin_alpha_pklb
  97. #define pkwb __builtin_alpha_pkwb
  98. #define unpkbl __builtin_alpha_unpkbl
  99. #define unpkbw __builtin_alpha_unpkbw
  100. #else
  101. #define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  102. #define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  103. #define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  104. #define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  105. #define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  106. #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
  107. #define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
  108. #define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
  109. #define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
  110. #define minub8(a, b) ({ uint64_t __r; asm ("minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  111. #define minsb8(a, b) ({ uint64_t __r; asm ("minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  112. #define minuw4(a, b) ({ uint64_t __r; asm ("minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  113. #define minsw4(a, b) ({ uint64_t __r; asm ("minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  114. #define maxub8(a, b) ({ uint64_t __r; asm ("maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  115. #define maxsb8(a, b) ({ uint64_t __r; asm ("maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  116. #define maxuw4(a, b) ({ uint64_t __r; asm ("maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  117. #define maxsw4(a, b) ({ uint64_t __r; asm ("maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
  118. #define perr(a, b) ({ uint64_t __r; asm ("perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
  119. #define pklb(a) ({ uint64_t __r; asm ("pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
  120. #define pkwb(a) ({ uint64_t __r; asm ("pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
  121. #define unpkbl(a) ({ uint64_t __r; asm ("unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
  122. #define unpkbw(a) ({ uint64_t __r; asm ("unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
  123. #endif
  124. #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
  125. #include <c_asm.h>
  126. #define ASM_ACCEPT_MVI
  127. #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
  128. #define uldq(a) (*(const __unaligned uint64_t *) (a))
  129. #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
  130. #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
  131. #define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
  132. #define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b)
  133. #define zap(a, b) asm ("zap %a0,%a1,%v0", a, b)
  134. #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
  135. #define amask(a) asm ("amask %a0,%v0", a)
  136. #define implver() asm ("implver %v0")
  137. #define rpcc() asm ("rpcc %v0")
  138. #define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
  139. #define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
  140. #define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
  141. #define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b)
  142. #define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b)
  143. #define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b)
  144. #define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b)
  145. #define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b)
  146. #define perr(a, b) asm ("perr %a0,%a1,%v0", a, b)
  147. #define pklb(a) asm ("pklb %a0,%v0", a)
  148. #define pkwb(a) asm ("pkwb %a0,%v0", a)
  149. #define unpkbl(a) asm ("unpkbl %a0,%v0", a)
  150. #define unpkbw(a) asm ("unpkbw %a0,%v0", a)
  151. #else
  152. #error "Unknown compiler!"
  153. #endif
  154. #endif /* LIBAVCODEC_ALPHA_ASM_H */