You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3893 lines
134KB

  1. /*
  2. * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. #include "avcodec.h"
  19. #include "common.h"
  20. #include "dsputil.h"
  21. #include "cabac.h"
  22. #include "mpegvideo.h"
  23. #undef NDEBUG
  24. #include <assert.h>
  25. #define MAX_DECOMPOSITIONS 8
  26. #define MAX_PLANES 4
  27. #define DWTELEM int
  28. #define QROOT 8
  29. static const int8_t quant3[256]={
  30. 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  31. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  32. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  33. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  34. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  39. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  40. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  41. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  42. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  43. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  44. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  45. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
  46. };
  47. static const int8_t quant3b[256]={
  48. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  49. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  50. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  51. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  52. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  53. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  54. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  55. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  57. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  58. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  59. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  60. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  61. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  62. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  63. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  64. };
  65. static const int8_t quant5[256]={
  66. 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  67. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  68. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  69. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  70. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  71. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  72. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  73. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  74. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  75. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  76. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  77. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  78. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  79. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  80. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  81. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
  82. };
  83. static const int8_t quant7[256]={
  84. 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  85. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  86. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  87. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  88. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  89. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  90. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  91. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  92. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
  93. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
  94. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
  95. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
  96. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
  97. -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
  98. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  99. -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
  100. };
  101. static const int8_t quant9[256]={
  102. 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  103. 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  104. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  105. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  106. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  107. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  108. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  109. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  110. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  111. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  112. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  113. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  114. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  115. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  116. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
  117. -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
  118. };
  119. static const int8_t quant11[256]={
  120. 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
  121. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  122. 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  123. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  124. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  125. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  126. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  127. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  128. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  129. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  130. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  131. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  132. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  133. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
  134. -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
  135. -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
  136. };
  137. static const int8_t quant13[256]={
  138. 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
  139. 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  140. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  141. 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  142. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  143. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  144. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  145. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  146. -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
  147. -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
  148. -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
  149. -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
  150. -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
  151. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  152. -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
  153. -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
  154. };
  155. #define OBMC_MAX 64
  156. #if 0 //64*cubic
  157. static const uint8_t obmc32[1024]={
  158. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  159. 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  160. 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
  161. 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
  162. 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
  163. 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
  164. 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
  165. 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
  166. 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
  167. 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
  168. 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
  169. 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
  170. 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
  171. 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
  172. 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
  173. 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
  174. 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
  175. 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
  176. 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
  177. 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
  178. 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
  179. 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
  180. 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
  181. 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
  182. 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
  183. 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
  184. 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
  185. 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
  186. 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
  187. 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
  188. 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  189. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  190. //error:0.000022
  191. };
  192. static const uint8_t obmc16[256]={
  193. 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
  194. 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
  195. 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
  196. 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
  197. 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
  198. 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
  199. 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
  200. 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
  201. 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
  202. 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
  203. 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
  204. 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
  205. 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
  206. 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
  207. 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
  208. 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
  209. //error:0.000033
  210. };
  211. #elif 1 // 64*linear
  212. static const uint8_t obmc32[1024]={
  213. 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
  214. 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
  215. 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
  216. 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
  217. 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
  218. 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
  219. 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
  220. 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
  221. 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
  222. 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
  223. 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
  224. 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
  225. 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
  226. 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
  227. 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
  228. 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
  229. 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
  230. 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
  231. 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
  232. 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
  233. 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
  234. 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
  235. 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
  236. 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
  237. 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
  238. 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
  239. 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
  240. 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
  241. 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
  242. 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
  243. 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
  244. 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
  245. //error:0.000020
  246. };
  247. static const uint8_t obmc16[256]={
  248. 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
  249. 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
  250. 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
  251. 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
  252. 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
  253. 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
  254. 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
  255. 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
  256. 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
  257. 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
  258. 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
  259. 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
  260. 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
  261. 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
  262. 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
  263. 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
  264. //error:0.000015
  265. };
  266. #else //64*cos
  267. static const uint8_t obmc32[1024]={
  268. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  269. 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
  270. 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
  271. 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
  272. 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
  273. 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
  274. 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
  275. 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
  276. 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
  277. 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
  278. 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
  279. 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
  280. 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
  281. 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
  282. 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
  283. 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
  284. 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
  285. 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
  286. 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
  287. 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
  288. 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
  289. 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
  290. 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
  291. 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
  292. 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
  293. 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
  294. 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
  295. 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
  296. 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
  297. 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
  298. 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
  299. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  300. //error:0.000022
  301. };
  302. static const uint8_t obmc16[256]={
  303. 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  304. 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
  305. 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
  306. 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
  307. 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
  308. 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
  309. 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
  310. 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
  311. 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
  312. 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
  313. 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
  314. 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
  315. 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
  316. 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
  317. 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
  318. 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  319. //error:0.000022
  320. };
  321. #endif
  322. typedef struct QTree{
  323. int treedim[MAX_DECOMPOSITIONS][2];
  324. uint8_t *tree[MAX_DECOMPOSITIONS];
  325. int max_level;
  326. int stride;
  327. }QTree;
  328. typedef struct SubBand{
  329. int level;
  330. int stride;
  331. int width;
  332. int height;
  333. int qlog; ///< log(qscale)/log[2^(1/6)]
  334. DWTELEM *buf;
  335. QTree tree;
  336. struct SubBand *parent;
  337. uint8_t state[/*7*2*/ 7 + 512][32];
  338. }SubBand;
  339. typedef struct Plane{
  340. int width;
  341. int height;
  342. SubBand band[MAX_DECOMPOSITIONS][4];
  343. }Plane;
  344. typedef struct SnowContext{
  345. // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  346. AVCodecContext *avctx;
  347. CABACContext c;
  348. DSPContext dsp;
  349. AVFrame input_picture;
  350. AVFrame current_picture;
  351. AVFrame last_picture;
  352. AVFrame mconly_picture;
  353. // uint8_t q_context[16];
  354. uint8_t header_state[32];
  355. int keyframe;
  356. int version;
  357. int spatial_decomposition_type;
  358. int temporal_decomposition_type;
  359. int spatial_decomposition_count;
  360. int temporal_decomposition_count;
  361. DWTELEM *spatial_dwt_buffer;
  362. DWTELEM *pred_buffer;
  363. int colorspace_type;
  364. int chroma_h_shift;
  365. int chroma_v_shift;
  366. int spatial_scalability;
  367. int qlog;
  368. int mv_scale;
  369. int qbias;
  370. #define QBIAS_SHIFT 3
  371. int b_width; //FIXME remove?
  372. int b_height; //FIXME remove?
  373. Plane plane[MAX_PLANES];
  374. SubBand mb_band;
  375. SubBand mv_band[2];
  376. uint16_t *mb_type;
  377. uint8_t *mb_mean;
  378. uint32_t *dummy;
  379. int16_t (*motion_val8)[2];
  380. int16_t (*motion_val16)[2];
  381. MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  382. }SnowContext;
  383. #define QEXPSHIFT 7 //FIXME try to change this to 0
  384. static const uint8_t qexp[8]={
  385. 128, 140, 152, 166, 181, 197, 215, 235
  386. // 64, 70, 76, 83, 91, 99, 108, 117
  387. // 32, 35, 38, 41, 45, 49, 54, 59
  388. // 16, 17, 19, 21, 23, 25, 27, 29
  389. // 8, 9, 10, 10, 11, 12, 13, 15
  390. };
  391. static inline int mirror(int v, int m){
  392. if (v<0) return -v;
  393. else if(v>m) return 2*m-v;
  394. else return v;
  395. }
  396. static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
  397. int i;
  398. if(v){
  399. const int a= ABS(v);
  400. const int e= av_log2(a);
  401. #if 1
  402. const int el= FFMIN(e, 10);
  403. put_cabac(c, state+0, 0);
  404. for(i=0; i<el; i++){
  405. put_cabac(c, state+1+i, 1); //1..10
  406. }
  407. for(; i<e; i++){
  408. put_cabac(c, state+1+9, 1); //1..10
  409. }
  410. put_cabac(c, state+1+FFMIN(i,9), 0);
  411. for(i=e-1; i>=el; i--){
  412. put_cabac(c, state+22+9, (a>>i)&1); //22..31
  413. }
  414. for(; i>=0; i--){
  415. put_cabac(c, state+22+i, (a>>i)&1); //22..31
  416. }
  417. if(is_signed)
  418. put_cabac(c, state+11 + el, v < 0); //11..21
  419. #else
  420. put_cabac(c, state+0, 0);
  421. if(e<=9){
  422. for(i=0; i<e; i++){
  423. put_cabac(c, state+1+i, 1); //1..10
  424. }
  425. put_cabac(c, state+1+i, 0);
  426. for(i=e-1; i>=0; i--){
  427. put_cabac(c, state+22+i, (a>>i)&1); //22..31
  428. }
  429. if(is_signed)
  430. put_cabac(c, state+11 + e, v < 0); //11..21
  431. }else{
  432. for(i=0; i<e; i++){
  433. put_cabac(c, state+1+FFMIN(i,9), 1); //1..10
  434. }
  435. put_cabac(c, state+1+FFMIN(i,9), 0);
  436. for(i=e-1; i>=0; i--){
  437. put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
  438. }
  439. if(is_signed)
  440. put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
  441. }
  442. #endif
  443. }else{
  444. put_cabac(c, state+0, 1);
  445. }
  446. }
  447. static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
  448. if(get_cabac(c, state+0))
  449. return 0;
  450. else{
  451. int i, e, a, el;
  452. //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
  453. for(e=0; e<10; e++){
  454. if(get_cabac(c, state + 1 + e)==0) // 1..10
  455. break;
  456. }
  457. el= e;
  458. if(e==10){
  459. while(get_cabac(c, state + 1 + 9)) //10
  460. e++;
  461. }
  462. a= 1;
  463. for(i=e-1; i>=el; i--){
  464. a += a + get_cabac(c, state+22+9); //31
  465. }
  466. for(; i>=0; i--){
  467. a += a + get_cabac(c, state+22+i); //22..31
  468. }
  469. if(is_signed && get_cabac(c, state+11 + el)) //11..21
  470. return -a;
  471. else
  472. return a;
  473. }
  474. }
  475. static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
  476. int i;
  477. int e= av_log2(v<<1);
  478. assert(v>=0);
  479. if(v==0) assert(e==0);
  480. while(e > log2){
  481. put_cabac(c, state+log2, 1);
  482. v -= 1<<log2;
  483. assert(v>=0);
  484. e= av_log2(v<<1);
  485. log2++;
  486. }
  487. put_cabac(c, state+log2, 0);
  488. for(i=log2-1; i>=0; i--){
  489. put_cabac(c, state+31-i, (v>>i)&1);
  490. }
  491. assert(!((v>>i)&1));
  492. }
  493. static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
  494. int i;
  495. int v=0;
  496. while(get_cabac(c, state+log2)){
  497. v+= 1<<log2;
  498. log2++;
  499. }
  500. for(i=log2-1; i>=0; i--){
  501. v+= get_cabac(c, state+31-i)<<i;
  502. }
  503. return v;
  504. }
  505. static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
  506. const int mirror_left= !highpass;
  507. const int mirror_right= (width&1) ^ highpass;
  508. const int w= (width>>1) - 1 + (highpass & width);
  509. int i;
  510. #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
  511. if(mirror_left){
  512. dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
  513. dst += dst_step;
  514. src += src_step;
  515. }
  516. for(i=0; i<w; i++){
  517. dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
  518. }
  519. if(mirror_right){
  520. dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
  521. }
  522. }
  523. static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
  524. const int mirror_left= !highpass;
  525. const int mirror_right= (width&1) ^ highpass;
  526. const int w= (width>>1) - 1 + (highpass & width);
  527. int i;
  528. if(mirror_left){
  529. int r= 3*2*ref[0];
  530. r += r>>4;
  531. r += r>>8;
  532. dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
  533. dst += dst_step;
  534. src += src_step;
  535. }
  536. for(i=0; i<w; i++){
  537. int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
  538. r += r>>4;
  539. r += r>>8;
  540. dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
  541. }
  542. if(mirror_right){
  543. int r= 3*2*ref[w*ref_step];
  544. r += r>>4;
  545. r += r>>8;
  546. dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
  547. }
  548. }
  549. static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
  550. int x, i;
  551. for(x=start; x<width; x+=2){
  552. int64_t sum=0;
  553. for(i=0; i<n; i++){
  554. int x2= x + 2*i - n + 1;
  555. if (x2< 0) x2= -x2;
  556. else if(x2>=width) x2= 2*width-x2-2;
  557. sum += coeffs[i]*(int64_t)dst[x2];
  558. }
  559. if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
  560. else dst[x] += (sum + (1<<shift)/2)>>shift;
  561. }
  562. }
  563. static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
  564. int x, y, i;
  565. for(y=start; y<height; y+=2){
  566. for(x=0; x<width; x++){
  567. int64_t sum=0;
  568. for(i=0; i<n; i++){
  569. int y2= y + 2*i - n + 1;
  570. if (y2< 0) y2= -y2;
  571. else if(y2>=height) y2= 2*height-y2-2;
  572. sum += coeffs[i]*(int64_t)dst[x + y2*stride];
  573. }
  574. if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
  575. else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
  576. }
  577. }
  578. }
  579. #define SCALEX 1
  580. #define LX0 0
  581. #define LX1 1
  582. #if 0 // more accurate 9/7
  583. #define N1 2
  584. #define SHIFT1 14
  585. #define COEFFS1 (int[]){-25987,-25987}
  586. #define N2 2
  587. #define SHIFT2 19
  588. #define COEFFS2 (int[]){-27777,-27777}
  589. #define N3 2
  590. #define SHIFT3 15
  591. #define COEFFS3 (int[]){28931,28931}
  592. #define N4 2
  593. #define SHIFT4 15
  594. #define COEFFS4 (int[]){14533,14533}
  595. #elif 1 // 13/7 CRF
  596. #define N1 4
  597. #define SHIFT1 4
  598. #define COEFFS1 (int[]){1,-9,-9,1}
  599. #define N2 4
  600. #define SHIFT2 4
  601. #define COEFFS2 (int[]){-1,5,5,-1}
  602. #define N3 0
  603. #define SHIFT3 1
  604. #define COEFFS3 NULL
  605. #define N4 0
  606. #define SHIFT4 1
  607. #define COEFFS4 NULL
  608. #elif 1 // 3/5
  609. #define LX0 1
  610. #define LX1 0
  611. #define SCALEX 0.5
  612. #define N1 2
  613. #define SHIFT1 1
  614. #define COEFFS1 (int[]){1,1}
  615. #define N2 2
  616. #define SHIFT2 2
  617. #define COEFFS2 (int[]){-1,-1}
  618. #define N3 0
  619. #define SHIFT3 0
  620. #define COEFFS3 NULL
  621. #define N4 0
  622. #define SHIFT4 0
  623. #define COEFFS4 NULL
  624. #elif 1 // 11/5
  625. #define N1 0
  626. #define SHIFT1 1
  627. #define COEFFS1 NULL
  628. #define N2 2
  629. #define SHIFT2 2
  630. #define COEFFS2 (int[]){-1,-1}
  631. #define N3 2
  632. #define SHIFT3 0
  633. #define COEFFS3 (int[]){-1,-1}
  634. #define N4 4
  635. #define SHIFT4 7
  636. #define COEFFS4 (int[]){-5,29,29,-5}
  637. #define SCALEX 4
  638. #elif 1 // 9/7 CDF
  639. #define N1 2
  640. #define SHIFT1 7
  641. #define COEFFS1 (int[]){-203,-203}
  642. #define N2 2
  643. #define SHIFT2 12
  644. #define COEFFS2 (int[]){-217,-217}
  645. #define N3 2
  646. #define SHIFT3 7
  647. #define COEFFS3 (int[]){113,113}
  648. #define N4 2
  649. #define SHIFT4 9
  650. #define COEFFS4 (int[]){227,227}
  651. #define SCALEX 1
  652. #elif 1 // 7/5 CDF
  653. #define N1 0
  654. #define SHIFT1 1
  655. #define COEFFS1 NULL
  656. #define N2 2
  657. #define SHIFT2 2
  658. #define COEFFS2 (int[]){-1,-1}
  659. #define N3 2
  660. #define SHIFT3 0
  661. #define COEFFS3 (int[]){-1,-1}
  662. #define N4 2
  663. #define SHIFT4 4
  664. #define COEFFS4 (int[]){3,3}
  665. #elif 1 // 9/7 MN
  666. #define N1 4
  667. #define SHIFT1 4
  668. #define COEFFS1 (int[]){1,-9,-9,1}
  669. #define N2 2
  670. #define SHIFT2 2
  671. #define COEFFS2 (int[]){1,1}
  672. #define N3 0
  673. #define SHIFT3 1
  674. #define COEFFS3 NULL
  675. #define N4 0
  676. #define SHIFT4 1
  677. #define COEFFS4 NULL
  678. #else // 13/7 CRF
  679. #define N1 4
  680. #define SHIFT1 4
  681. #define COEFFS1 (int[]){1,-9,-9,1}
  682. #define N2 4
  683. #define SHIFT2 4
  684. #define COEFFS2 (int[]){-1,5,5,-1}
  685. #define N3 0
  686. #define SHIFT3 1
  687. #define COEFFS3 NULL
  688. #define N4 0
  689. #define SHIFT4 1
  690. #define COEFFS4 NULL
  691. #endif
  692. static void horizontal_decomposeX(int *b, int width){
  693. int temp[width];
  694. const int width2= width>>1;
  695. const int w2= (width+1)>>1;
  696. int A1,A2,A3,A4, x;
  697. inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
  698. inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
  699. inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
  700. inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
  701. for(x=0; x<width2; x++){
  702. temp[x ]= b[2*x ];
  703. temp[x+w2]= b[2*x + 1];
  704. }
  705. if(width&1)
  706. temp[x ]= b[2*x ];
  707. memcpy(b, temp, width*sizeof(int));
  708. }
  709. static void horizontal_composeX(int *b, int width){
  710. int temp[width];
  711. const int width2= width>>1;
  712. int A1,A2,A3,A4, x;
  713. const int w2= (width+1)>>1;
  714. memcpy(temp, b, width*sizeof(int));
  715. for(x=0; x<width2; x++){
  716. b[2*x ]= temp[x ];
  717. b[2*x + 1]= temp[x+w2];
  718. }
  719. if(width&1)
  720. b[2*x ]= temp[x ];
  721. inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
  722. inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
  723. inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
  724. inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
  725. }
  726. static void spatial_decomposeX(int *buffer, int width, int height, int stride){
  727. int x, y;
  728. for(y=0; y<height; y++){
  729. for(x=0; x<width; x++){
  730. buffer[y*stride + x] *= SCALEX;
  731. }
  732. }
  733. for(y=0; y<height; y++){
  734. horizontal_decomposeX(buffer + y*stride, width);
  735. }
  736. inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
  737. inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
  738. inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
  739. inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
  740. }
  741. static void spatial_composeX(int *buffer, int width, int height, int stride){
  742. int x, y;
  743. inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
  744. inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
  745. inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
  746. inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
  747. for(y=0; y<height; y++){
  748. horizontal_composeX(buffer + y*stride, width);
  749. }
  750. for(y=0; y<height; y++){
  751. for(x=0; x<width; x++){
  752. buffer[y*stride + x] /= SCALEX;
  753. }
  754. }
  755. }
  756. static void horizontal_decompose53i(int *b, int width){
  757. int temp[width];
  758. const int width2= width>>1;
  759. int A1,A2,A3,A4, x;
  760. const int w2= (width+1)>>1;
  761. for(x=0; x<width2; x++){
  762. temp[x ]= b[2*x ];
  763. temp[x+w2]= b[2*x + 1];
  764. }
  765. if(width&1)
  766. temp[x ]= b[2*x ];
  767. #if 0
  768. A2= temp[1 ];
  769. A4= temp[0 ];
  770. A1= temp[0+width2];
  771. A1 -= (A2 + A4)>>1;
  772. A4 += (A1 + 1)>>1;
  773. b[0+width2] = A1;
  774. b[0 ] = A4;
  775. for(x=1; x+1<width2; x+=2){
  776. A3= temp[x+width2];
  777. A4= temp[x+1 ];
  778. A3 -= (A2 + A4)>>1;
  779. A2 += (A1 + A3 + 2)>>2;
  780. b[x+width2] = A3;
  781. b[x ] = A2;
  782. A1= temp[x+1+width2];
  783. A2= temp[x+2 ];
  784. A1 -= (A2 + A4)>>1;
  785. A4 += (A1 + A3 + 2)>>2;
  786. b[x+1+width2] = A1;
  787. b[x+1 ] = A4;
  788. }
  789. A3= temp[width-1];
  790. A3 -= A2;
  791. A2 += (A1 + A3 + 2)>>2;
  792. b[width -1] = A3;
  793. b[width2-1] = A2;
  794. #else
  795. lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
  796. lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
  797. #endif
  798. }
  799. static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
  800. int i;
  801. for(i=0; i<width; i++){
  802. b1[i] -= (b0[i] + b2[i])>>1;
  803. }
  804. }
  805. static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
  806. int i;
  807. for(i=0; i<width; i++){
  808. b1[i] += (b0[i] + b2[i] + 2)>>2;
  809. }
  810. }
  811. static void spatial_decompose53i(int *buffer, int width, int height, int stride){
  812. int x, y;
  813. DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
  814. DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
  815. for(y=-2; y<height; y+=2){
  816. DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
  817. DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
  818. {START_TIMER
  819. if(b1 <= b3) horizontal_decompose53i(b2, width);
  820. if(y+2 < height) horizontal_decompose53i(b3, width);
  821. STOP_TIMER("horizontal_decompose53i")}
  822. {START_TIMER
  823. if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
  824. if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
  825. STOP_TIMER("vertical_decompose53i*")}
  826. b0=b2;
  827. b1=b3;
  828. }
  829. }
  830. #define lift5 lift
  831. #if 1
  832. #define W_AM 3
  833. #define W_AO 0
  834. #define W_AS 1
  835. #define W_BM 1
  836. #define W_BO 8
  837. #define W_BS 4
  838. #undef lift5
  839. #define W_CM 9999
  840. #define W_CO 2
  841. #define W_CS 2
  842. #define W_DM 15
  843. #define W_DO 16
  844. #define W_DS 5
  845. #elif 0
  846. #define W_AM 55
  847. #define W_AO 16
  848. #define W_AS 5
  849. #define W_BM 3
  850. #define W_BO 32
  851. #define W_BS 6
  852. #define W_CM 127
  853. #define W_CO 64
  854. #define W_CS 7
  855. #define W_DM 7
  856. #define W_DO 8
  857. #define W_DS 4
  858. #elif 0
  859. #define W_AM 97
  860. #define W_AO 32
  861. #define W_AS 6
  862. #define W_BM 63
  863. #define W_BO 512
  864. #define W_BS 10
  865. #define W_CM 13
  866. #define W_CO 8
  867. #define W_CS 4
  868. #define W_DM 15
  869. #define W_DO 16
  870. #define W_DS 5
  871. #else
  872. #define W_AM 203
  873. #define W_AO 64
  874. #define W_AS 7
  875. #define W_BM 217
  876. #define W_BO 2048
  877. #define W_BS 12
  878. #define W_CM 113
  879. #define W_CO 64
  880. #define W_CS 7
  881. #define W_DM 227
  882. #define W_DO 128
  883. #define W_DS 9
  884. #endif
  885. static void horizontal_decompose97i(int *b, int width){
  886. int temp[width];
  887. const int w2= (width+1)>>1;
  888. lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
  889. lift (temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
  890. lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
  891. lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
  892. }
  893. static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
  894. int i;
  895. for(i=0; i<width; i++){
  896. b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
  897. }
  898. }
  899. static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
  900. int i;
  901. for(i=0; i<width; i++){
  902. #ifdef lift5
  903. b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
  904. #else
  905. int r= 3*(b0[i] + b2[i]);
  906. r+= r>>4;
  907. r+= r>>8;
  908. b1[i] += (r+W_CO)>>W_CS;
  909. #endif
  910. }
  911. }
  912. static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
  913. int i;
  914. for(i=0; i<width; i++){
  915. b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
  916. }
  917. }
  918. static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
  919. int i;
  920. for(i=0; i<width; i++){
  921. b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
  922. }
  923. }
  924. static void spatial_decompose97i(int *buffer, int width, int height, int stride){
  925. int x, y;
  926. DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
  927. DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
  928. DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
  929. DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
  930. for(y=-4; y<height; y+=2){
  931. DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
  932. DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
  933. {START_TIMER
  934. if(b3 <= b5) horizontal_decompose97i(b4, width);
  935. if(y+4 < height) horizontal_decompose97i(b5, width);
  936. if(width>400){
  937. STOP_TIMER("horizontal_decompose97i")
  938. }}
  939. {START_TIMER
  940. if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
  941. if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
  942. if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
  943. if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
  944. if(width>400){
  945. STOP_TIMER("vertical_decompose97i")
  946. }}
  947. b0=b2;
  948. b1=b3;
  949. b2=b4;
  950. b3=b5;
  951. }
  952. }
  953. static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
  954. int level;
  955. for(level=0; level<s->spatial_decomposition_count; level++){
  956. switch(s->spatial_decomposition_type){
  957. case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
  958. case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
  959. case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
  960. }
  961. }
  962. }
  963. static void horizontal_compose53i(int *b, int width){
  964. int temp[width];
  965. const int width2= width>>1;
  966. const int w2= (width+1)>>1;
  967. int A1,A2,A3,A4, x;
  968. #if 0
  969. A2= temp[1 ];
  970. A4= temp[0 ];
  971. A1= temp[0+width2];
  972. A1 -= (A2 + A4)>>1;
  973. A4 += (A1 + 1)>>1;
  974. b[0+width2] = A1;
  975. b[0 ] = A4;
  976. for(x=1; x+1<width2; x+=2){
  977. A3= temp[x+width2];
  978. A4= temp[x+1 ];
  979. A3 -= (A2 + A4)>>1;
  980. A2 += (A1 + A3 + 2)>>2;
  981. b[x+width2] = A3;
  982. b[x ] = A2;
  983. A1= temp[x+1+width2];
  984. A2= temp[x+2 ];
  985. A1 -= (A2 + A4)>>1;
  986. A4 += (A1 + A3 + 2)>>2;
  987. b[x+1+width2] = A1;
  988. b[x+1 ] = A4;
  989. }
  990. A3= temp[width-1];
  991. A3 -= A2;
  992. A2 += (A1 + A3 + 2)>>2;
  993. b[width -1] = A3;
  994. b[width2-1] = A2;
  995. #else
  996. lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
  997. lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
  998. #endif
  999. for(x=0; x<width2; x++){
  1000. b[2*x ]= temp[x ];
  1001. b[2*x + 1]= temp[x+w2];
  1002. }
  1003. if(width&1)
  1004. b[2*x ]= temp[x ];
  1005. }
  1006. static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
  1007. int i;
  1008. for(i=0; i<width; i++){
  1009. b1[i] += (b0[i] + b2[i])>>1;
  1010. }
  1011. }
  1012. static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
  1013. int i;
  1014. for(i=0; i<width; i++){
  1015. b1[i] -= (b0[i] + b2[i] + 2)>>2;
  1016. }
  1017. }
  1018. static void spatial_compose53i(int *buffer, int width, int height, int stride){
  1019. int x, y;
  1020. DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
  1021. DWTELEM *b1= buffer + mirror(-1 , height-1)*stride;
  1022. for(y=-1; y<=height; y+=2){
  1023. DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
  1024. DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
  1025. {START_TIMER
  1026. if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
  1027. if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
  1028. STOP_TIMER("vertical_compose53i*")}
  1029. {START_TIMER
  1030. if(y-1 >= 0) horizontal_compose53i(b0, width);
  1031. if(b0 <= b2) horizontal_compose53i(b1, width);
  1032. STOP_TIMER("horizontal_compose53i")}
  1033. b0=b2;
  1034. b1=b3;
  1035. }
  1036. }
  1037. static void horizontal_compose97i(int *b, int width){
  1038. int temp[width];
  1039. const int w2= (width+1)>>1;
  1040. lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
  1041. lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
  1042. lift (b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
  1043. lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
  1044. }
  1045. static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
  1046. int i;
  1047. for(i=0; i<width; i++){
  1048. b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
  1049. }
  1050. }
  1051. static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
  1052. int i;
  1053. for(i=0; i<width; i++){
  1054. #ifdef lift5
  1055. b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
  1056. #else
  1057. int r= 3*(b0[i] + b2[i]);
  1058. r+= r>>4;
  1059. r+= r>>8;
  1060. b1[i] -= (r+W_CO)>>W_CS;
  1061. #endif
  1062. }
  1063. }
  1064. static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
  1065. int i;
  1066. for(i=0; i<width; i++){
  1067. b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
  1068. }
  1069. }
  1070. static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
  1071. int i;
  1072. for(i=0; i<width; i++){
  1073. b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
  1074. }
  1075. }
  1076. static void spatial_compose97i(int *buffer, int width, int height, int stride){
  1077. int x, y;
  1078. DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
  1079. DWTELEM *b1= buffer + mirror(-3 , height-1)*stride;
  1080. DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
  1081. DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
  1082. for(y=-3; y<=height; y+=2){
  1083. DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
  1084. DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
  1085. if(stride == width && y+4 < height && 0){
  1086. int x;
  1087. for(x=0; x<width/2; x++)
  1088. b5[x] += 64*2;
  1089. for(; x<width; x++)
  1090. b5[x] += 169*2;
  1091. }
  1092. {START_TIMER
  1093. if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
  1094. if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
  1095. if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
  1096. if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
  1097. if(width>400){
  1098. STOP_TIMER("vertical_compose97i")}}
  1099. {START_TIMER
  1100. if(y-1>= 0) horizontal_compose97i(b0, width);
  1101. if(b0 <= b2) horizontal_compose97i(b1, width);
  1102. if(width>400 && b0 <= b2){
  1103. STOP_TIMER("horizontal_compose97i")}}
  1104. b0=b2;
  1105. b1=b3;
  1106. b2=b4;
  1107. b3=b5;
  1108. }
  1109. }
  1110. static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
  1111. int level;
  1112. for(level=s->spatial_decomposition_count-1; level>=0; level--){
  1113. switch(s->spatial_decomposition_type){
  1114. case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
  1115. case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
  1116. case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break;
  1117. }
  1118. }
  1119. }
  1120. static const int hilbert[16][2]={
  1121. {0,0}, {1,0}, {1,1}, {0,1},
  1122. {0,2}, {0,3}, {1,3}, {1,2},
  1123. {2,2}, {2,3}, {3,3}, {3,2},
  1124. {3,1}, {2,1}, {2,0}, {3,0},
  1125. };
  1126. #if 0
  1127. -o o-
  1128. | |
  1129. o-o
  1130. -o-o o-o-
  1131. | |
  1132. o-o o-o
  1133. | |
  1134. o o-o o
  1135. | | | |
  1136. o-o o-o
  1137. 0112122312232334122323342334
  1138. 0123456789ABCDEF0123456789AB
  1139. RLLRMRRLLRRMRLLMLRRLMLLRRLLM
  1140. 4 B F 14 1B
  1141. 4 11 15 20 27
  1142. -o o-o-o o-o-o o-
  1143. | | | | | |
  1144. o-o o-o o-o o-o
  1145. | |
  1146. o-o o-o o-o o-o
  1147. | | | | | |
  1148. o o-o-o o-o-o o
  1149. | |
  1150. o-o o-o-o-o o-o
  1151. | | | |
  1152. o-o o-o o-o o-o
  1153. | | | |
  1154. o o-o o o o-o o
  1155. | | | | | | | |
  1156. o-o o-o o-o o-o
  1157. #endif
  1158. #define SVI(a, i, x, y) \
  1159. {\
  1160. a[i][0]= x;\
  1161. a[i][1]= y;\
  1162. i++;\
  1163. }
  1164. static int sig_cmp(const void *a, const void *b){
  1165. const int16_t* da = (const int16_t *) a;
  1166. const int16_t* db = (const int16_t *) b;
  1167. if(da[1] != db[1]) return da[1] - db[1];
  1168. else return da[0] - db[0];
  1169. }
  1170. static int alloc_qtree(QTree *t, int w, int h){
  1171. int lev, x, y, tree_h;
  1172. int w2= w;
  1173. int h2= h;
  1174. t->stride=0;
  1175. t->max_level= av_log2(2*FFMAX(w,h)-1);
  1176. for(lev=t->max_level; lev>=0; lev--){
  1177. if(lev!=t->max_level)
  1178. t->stride += w2;
  1179. t->treedim[lev][0]= w2;
  1180. t->treedim[lev][1]= h2;
  1181. av_log(NULL, AV_LOG_DEBUG, "alloc %p %d %d %d\n", t, w2, h2, t->max_level);
  1182. w2= (w2+1)>>1;
  1183. h2= (h2+1)>>1;
  1184. }
  1185. t->stride= FFMAX(t->stride, w);
  1186. tree_h= h + t->treedim[t->max_level-1][1];
  1187. t->tree[t->max_level]= av_mallocz(t->stride * tree_h);
  1188. t->tree[t->max_level-1]= t->tree[t->max_level] + h*t->stride;
  1189. for(lev=t->max_level-2; lev>=0; lev--){
  1190. t->tree[lev]= t->tree[lev+1] + t->treedim[lev+1][0];
  1191. }
  1192. return 0;
  1193. }
  1194. static void free_qtree(QTree *t){
  1195. if(t && t->tree);
  1196. av_freep(&t->tree[t->max_level]);
  1197. }
  1198. static void init_quandtree(QTree *t, DWTELEM *src, int w, int h, int stride){
  1199. const int max_level= t->max_level;
  1200. const int tree_stride= t->stride;
  1201. uint8_t **tree= t->tree;
  1202. int lev, x, y, tree_h, w2, h2;
  1203. //av_log(NULL, AV_LOG_DEBUG, "init %p %d %d %d %d %d\n", t, w, h, t->max_level, t->treedim[max_level][0], t->treedim[max_level][1]);
  1204. assert(w==t->treedim[max_level][0]);
  1205. assert(h==t->treedim[max_level][1]);
  1206. for(y=0; y<h; y++){
  1207. for(x=0; x<w; x++){
  1208. tree[max_level][x + y*tree_stride]= clip(ABS(src[x + y*stride]), 0, 16);
  1209. }
  1210. }
  1211. for(lev=max_level-1; lev>=0; lev--){
  1212. w2= t->treedim[lev+1][0]>>1;
  1213. h2= t->treedim[lev+1][1]>>1;
  1214. for(y=0; y<h2; y++){
  1215. for(x=0; x<w2; x++){
  1216. tree[lev][x + y*tree_stride]=clip( (tree[lev+1][2*x + 2*y *tree_stride])
  1217. + (tree[lev+1][2*x + 1 + 2*y *tree_stride])
  1218. + (tree[lev+1][2*x + (2*y+1)*tree_stride])
  1219. + (tree[lev+1][2*x + 1 + (2*y+1)*tree_stride])+3, 0, 64)/4;
  1220. }
  1221. }
  1222. if(w2 != t->treedim[lev][0]){
  1223. for(y=0; y<h2; y++){
  1224. tree[lev][w2 + y*tree_stride]=clip( (tree[lev+1][2*w2 + 2*y *tree_stride])
  1225. +(tree[lev+1][2*w2 + (2*y+1)*tree_stride])+3, 0, 64)/4;
  1226. }
  1227. }
  1228. if(h2 != t->treedim[lev][1]){
  1229. for(x=0; x<w2; x++){
  1230. tree[lev][x + h2*tree_stride]=clip( (tree[lev+1][2*x + 2*h2*tree_stride])
  1231. +(tree[lev+1][2*x + 1 + 2*h2*tree_stride])+3, 0, 64)/4;
  1232. }
  1233. }
  1234. if(w2 != t->treedim[lev][0] && h2 != t->treedim[lev][1]){
  1235. tree[lev][w2 + h2*tree_stride]= tree[lev+1][2*w2 + 2*h2*tree_stride];
  1236. }
  1237. }
  1238. }
  1239. int white_leaf, gray_leaf;
  1240. static void encode_branch(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int lev, int x, int y, int first){
  1241. const int max_level= b->tree.max_level;
  1242. const int pmax_level= b->parent ? b->parent->tree.max_level : 0;
  1243. const int tree_stride= b->tree.stride;
  1244. const int ptree_stride= b->parent ? b->parent->tree.stride : 0;
  1245. int (*treedim)[2]= b->tree.treedim;
  1246. int (*ptreedim)[2]= b->parent ? b->parent->tree.treedim : NULL;
  1247. uint8_t **tree= b->tree.tree;
  1248. uint8_t **ptree= b->parent ? b->parent->tree.tree : NULL;
  1249. // int w2=w, h2=h;
  1250. int l=0, t=0, lt=0, p=0;
  1251. int v= tree[lev][x + y*tree_stride];
  1252. int context, sig;
  1253. if(!first && !tree[lev-1][x/2 + y/2*tree_stride])
  1254. return;
  1255. if(x) l= tree[lev][x - 1 + y*tree_stride];
  1256. if(y){
  1257. t= tree[lev][x + (y-1)*tree_stride];
  1258. if(x) lt= tree[lev][x - 1 + (y-1)*tree_stride];
  1259. }
  1260. if(lev < max_level && parent && x<ptreedim[lev][0] && y<ptreedim[lev][1])
  1261. p= ptree[lev - max_level + pmax_level + 1][x + y*ptree_stride];
  1262. if(lev != max_level)
  1263. context= lev + 32*av_log2(2*(3*(l) + 2*(t) + (lt) + 2*(p)));
  1264. else{
  1265. int p=0, l=0, lt=0, t=0, rt=0;
  1266. if(y){
  1267. t= src[x + (y-1)*stride];
  1268. if(x)
  1269. lt= src[x - 1 + (y-1)*stride];
  1270. }
  1271. if(x)
  1272. l= src[x - 1 + y*stride];
  1273. if(parent){
  1274. int px= x>>1;
  1275. int py= y>>1;
  1276. if(px<b->parent->width && py<b->parent->height){
  1277. p= parent[px + py*2*stride];
  1278. }
  1279. }
  1280. context= lev + 32*av_log2(2*(3*ABS(l) + 2*ABS(t) + ABS(lt) + ABS(p)));
  1281. }
  1282. if( (x&1) && l) sig=1;
  1283. else if((y&1) && t) sig=1;
  1284. else if((x&1) && (y&1) && lt) sig=1;
  1285. else sig=0;
  1286. if(!first){
  1287. if(sig) context+= 8+16;
  1288. else context+= 8*(x&1) + 16*(y&1);
  1289. }
  1290. if(l||t||lt||(x&1)==0||(y&1)==0||first){
  1291. put_cabac(&s->c, &b->state[98][context], !!v);
  1292. }else
  1293. assert(v);
  1294. if(v){
  1295. if(lev==max_level){
  1296. int p=0;
  1297. int /*ll=0, */l=0, lt=0, t=0;
  1298. int v= src[x + y*stride];
  1299. if(y){
  1300. t= src[x + (y-1)*stride];
  1301. if(x){
  1302. lt= src[x - 1 + (y-1)*stride];
  1303. }
  1304. }
  1305. if(x){
  1306. l= src[x - 1 + y*stride];
  1307. }
  1308. if(parent){
  1309. int px= x>>1;
  1310. int py= y>>1;
  1311. if(px<b->parent->width && py<b->parent->height){
  1312. p= parent[px + py*2*stride];
  1313. }
  1314. }
  1315. {
  1316. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(p)
  1317. /*+ 3*(!!r) + 2*(!!d)*/);
  1318. put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
  1319. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
  1320. assert(tree[max_level][x + y*tree_stride]);
  1321. assert(tree[max_level-1][x/2 + y/2*tree_stride]);
  1322. }
  1323. gray_leaf++;
  1324. }else{
  1325. int r= 2*x+1 < treedim[lev+1][0];
  1326. int d= 2*y+1 < treedim[lev+1][1];
  1327. encode_branch (s, b, src, parent, stride, lev+1, 2*x , 2*y , 0);
  1328. if(r) encode_branch(s, b, src, parent, stride, lev+1, 2*x+1, 2*y , 0);
  1329. if(d) encode_branch(s, b, src, parent, stride, lev+1, 2*x , 2*y+1, 0);
  1330. if(r&&d)encode_branch(s, b, src, parent, stride, lev+1, 2*x+1, 2*y+1, 0);
  1331. }
  1332. }
  1333. }
  1334. static void encode_subband_qtree(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1335. const int level= b->level;
  1336. const int w= b->width;
  1337. const int h= b->height;
  1338. int x, y, i;
  1339. init_quandtree(&b->tree, src, b->width, b->height, stride);
  1340. if(parent){
  1341. init_quandtree(&b->parent->tree, parent, b->parent->width, b->parent->height, 2*stride);
  1342. }
  1343. for(i=0; i<b->tree.max_level; i++){
  1344. int count=0;
  1345. for(y=0; y<b->tree.treedim[i][1]; y++){
  1346. for(x=0; x<b->tree.treedim[i][0]; x++){
  1347. if(b->tree.tree[i][x + y*b->tree.stride])
  1348. count++;
  1349. }
  1350. }
  1351. if(2*count < b->tree.treedim[i][1]*b->tree.treedim[i][0])
  1352. break;
  1353. }
  1354. //FIXME try recursive scan
  1355. for(y=0; y<b->tree.treedim[i][1]; y++){
  1356. for(x=0; x<b->tree.treedim[i][0]; x++){
  1357. encode_branch(s, b, src, parent, stride, i, x, y, 1);
  1358. }
  1359. }
  1360. // encode_branch(s, b, src, parent, stride, 0, 0, 0, 1);
  1361. // av_log(NULL, AV_LOG_DEBUG, "%d %d\n", gray_leaf, white_leaf);
  1362. #if 0
  1363. for(lev=0; lev<=max_level; lev++){
  1364. w2= treedim[lev][0];
  1365. h2= treedim[lev][1];
  1366. for(y=0; y<h2; y++){
  1367. for(x=0; x<w2; x++){
  1368. int l= 0, t=0, rt=0, lt=0, p=0;
  1369. int v= tree[lev][x + y*tree_stride];
  1370. int context, sig;
  1371. if(lev && !tree[lev-1][x/2 + y/2*tree_stride])
  1372. continue;
  1373. if(x) l= tree[lev][x - 1 + y*tree_stride];
  1374. if(y){
  1375. t= tree[lev][x + (y-1)*tree_stride];
  1376. if(x) lt= tree[lev][x - 1 + (y-1)*tree_stride];
  1377. if(x+1<w2) rt= tree[lev][x + 1 + (y-1)*tree_stride];
  1378. }
  1379. if(lev < max_level && parent && x<ptreedim[lev][0] && y<ptreedim[lev][1])
  1380. p= ptree[lev][x + y*ptree_stride];
  1381. context= lev + 32*av_log2(2*(3*l + 2*t + lt + rt + 8*p));
  1382. if( (x&1) && l) sig=1;
  1383. else if((y&1) && t) sig=1;
  1384. else if((x&1) && (y&1) && lt) sig=1;
  1385. else sig=0;
  1386. if(sig) context+= 8+16;
  1387. else context+= 8*(x&1) + 16*(y&1);
  1388. if(l||t||lt||(x&1)==0||(y&1)==0)
  1389. put_cabac(&s->c, &b->state[98][context], !!v);
  1390. else
  1391. assert(v);
  1392. if(v && lev==max_level){
  1393. int p=0;
  1394. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  1395. int v= src[x + y*stride];
  1396. if(y){
  1397. t= src[x + (y-1)*stride];
  1398. if(x){
  1399. lt= src[x - 1 + (y-1)*stride];
  1400. }
  1401. if(x + 1 < w){
  1402. rt= src[x + 1 + (y-1)*stride];
  1403. }
  1404. }
  1405. if(x){
  1406. l= src[x - 1 + y*stride];
  1407. /*if(x > 1){
  1408. if(orientation==1) ll= src[y + (x-2)*stride];
  1409. else ll= src[x - 2 + y*stride];
  1410. }*/
  1411. }
  1412. if(parent){
  1413. int px= x>>1;
  1414. int py= y>>1;
  1415. if(px<b->parent->width && py<b->parent->height){
  1416. p= parent[px + py*2*stride];
  1417. }
  1418. }
  1419. if(v){
  1420. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(p)
  1421. /*+ 3*(!!r) + 2*(!!d)*/);
  1422. put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
  1423. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
  1424. assert(tree[max_level][x + y*tree_stride]);
  1425. assert(tree[max_level-1][x/2 + y/2*tree_stride]);
  1426. }else
  1427. assert(0);
  1428. }
  1429. }
  1430. }
  1431. }
  1432. #endif
  1433. }
  1434. static int deint(unsigned int a){
  1435. a &= 0x55555555; //0 1 2 3 4 5 6 7 8 9 A B C D E F
  1436. a += a & 0x11111111; // 01 23 45 67 89 AB CD EF
  1437. a += 3*(a & 0x0F0F0F0F);// 0123 4567 89AB CDEF
  1438. a += 15*(a & 0x00FF00FF);// 01234567 89ABCDEF
  1439. a +=255*(a & 0x0000FFFF);// 0123456789ABCDEF
  1440. return a>>15;
  1441. }
  1442. static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1443. const int level= b->level;
  1444. const int w= b->width;
  1445. const int h= b->height;
  1446. int x, y, pos;
  1447. if(1){
  1448. int run=0;
  1449. int runs[w*h];
  1450. int run_index=0;
  1451. int count=0;
  1452. for(pos=0; ; pos++){
  1453. int x= deint(pos );
  1454. int y= deint(pos>>1);
  1455. int v, p=0, pr=0, pd=0;
  1456. int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
  1457. if(x>=w || y>=h){
  1458. if(x>=w && y>=h)
  1459. break;
  1460. continue;
  1461. }
  1462. count++;
  1463. v= src[x + y*stride];
  1464. if(y){
  1465. t= src[x + (y-1)*stride];
  1466. if(x){
  1467. lt= src[x - 1 + (y-1)*stride];
  1468. }
  1469. if(x + 1 < w){
  1470. /*rt= src[x + 1 + (y-1)*stride]*/;
  1471. }
  1472. }
  1473. if(x){
  1474. l= src[x - 1 + y*stride];
  1475. /*if(x > 1){
  1476. if(orientation==1) ll= src[y + (x-2)*stride];
  1477. else ll= src[x - 2 + y*stride];
  1478. }*/
  1479. }
  1480. if(parent){
  1481. int px= x>>1;
  1482. int py= y>>1;
  1483. if(px<b->parent->width && py<b->parent->height){
  1484. p= parent[px + py*2*stride];
  1485. /*if(px+1<b->parent->width)
  1486. pr= parent[px + 1 + py*2*stride];
  1487. if(py+1<b->parent->height)
  1488. pd= parent[px + (py+1)*2*stride];*/
  1489. }
  1490. }
  1491. if(!(/*ll|*/l|lt|t|/*rt|*/p)){
  1492. if(v){
  1493. runs[run_index++]= run;
  1494. run=0;
  1495. }else{
  1496. run++;
  1497. }
  1498. }
  1499. }
  1500. assert(count==w*h);
  1501. runs[run_index++]= run;
  1502. run_index=0;
  1503. run= runs[run_index++];
  1504. put_symbol(&s->c, b->state[1], run, 0);
  1505. for(pos=0; ; pos++){
  1506. int x= deint(pos );
  1507. int y= deint(pos>>1);
  1508. int v, p=0, pr=0, pd=0;
  1509. int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
  1510. if(x>=w || y>=h){
  1511. if(x>=w && y>=h)
  1512. break;
  1513. continue;
  1514. }
  1515. v= src[x + y*stride];
  1516. if(y){
  1517. t= src[x + (y-1)*stride];
  1518. if(x){
  1519. lt= src[x - 1 + (y-1)*stride];
  1520. }
  1521. if(x + 1 < w){
  1522. // rt= src[x + 1 + (y-1)*stride];
  1523. }
  1524. }
  1525. if(x){
  1526. l= src[x - 1 + y*stride];
  1527. /*if(x > 1){
  1528. if(orientation==1) ll= src[y + (x-2)*stride];
  1529. else ll= src[x - 2 + y*stride];
  1530. }*/
  1531. }
  1532. if(parent){
  1533. int px= x>>1;
  1534. int py= y>>1;
  1535. if(px<b->parent->width && py<b->parent->height){
  1536. p= parent[px + py*2*stride];
  1537. /* if(px+1<b->parent->width)
  1538. pr= parent[px + 1 + py*2*stride];
  1539. if(py+1<b->parent->height)
  1540. pd= parent[px + (py+1)*2*stride];*/
  1541. }
  1542. }
  1543. if(/*ll|*/l|lt|t|/*rt|*/p){
  1544. int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
  1545. put_cabac(&s->c, &b->state[0][context], !!v);
  1546. }else{
  1547. if(!run){
  1548. run= runs[run_index++];
  1549. put_symbol(&s->c, b->state[1], run, 0);
  1550. assert(v);
  1551. }else{
  1552. run--;
  1553. assert(!v);
  1554. }
  1555. }
  1556. if(v){
  1557. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
  1558. put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
  1559. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
  1560. }
  1561. }
  1562. }
  1563. }
  1564. static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1565. const int level= b->level;
  1566. const int w= b->width;
  1567. const int h= b->height;
  1568. int x, y;
  1569. #if 0
  1570. int plane;
  1571. for(plane=24; plane>=0; plane--){
  1572. int run=0;
  1573. int runs[w*h];
  1574. int run_index=0;
  1575. for(y=0; y<h; y++){
  1576. for(x=0; x<w; x++){
  1577. int v, lv, p=0;
  1578. int d=0, r=0, rd=0, ld=0;
  1579. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  1580. v= src[x + y*stride];
  1581. if(y){
  1582. t= src[x + (y-1)*stride];
  1583. if(x){
  1584. lt= src[x - 1 + (y-1)*stride];
  1585. }
  1586. if(x + 1 < w){
  1587. rt= src[x + 1 + (y-1)*stride];
  1588. }
  1589. }
  1590. if(x){
  1591. l= src[x - 1 + y*stride];
  1592. /*if(x > 1){
  1593. if(orientation==1) ll= src[y + (x-2)*stride];
  1594. else ll= src[x - 2 + y*stride];
  1595. }*/
  1596. }
  1597. if(y+1<h){
  1598. d= src[x + (y+1)*stride];
  1599. if(x) ld= src[x - 1 + (y+1)*stride];
  1600. if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
  1601. }
  1602. if(x + 1 < w)
  1603. r= src[x + 1 + y*stride];
  1604. if(parent){
  1605. int px= x>>1;
  1606. int py= y>>1;
  1607. if(px<b->parent->width && py<b->parent->height)
  1608. p= parent[px + py*2*stride];
  1609. }
  1610. #define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
  1611. lv=v;
  1612. HIDE( v, plane)
  1613. HIDE(lv, plane+1)
  1614. HIDE( p, plane)
  1615. HIDE( l, plane)
  1616. HIDE(lt, plane)
  1617. HIDE( t, plane)
  1618. HIDE(rt, plane)
  1619. HIDE( r, plane+1)
  1620. HIDE(rd, plane+1)
  1621. HIDE( d, plane+1)
  1622. HIDE(ld, plane+1)
  1623. if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
  1624. if(v){
  1625. runs[run_index++]= run;
  1626. run=0;
  1627. }else{
  1628. run++;
  1629. }
  1630. }
  1631. }
  1632. }
  1633. runs[run_index++]= run;
  1634. run_index=0;
  1635. run= runs[run_index++];
  1636. put_symbol(&s->c, b->state[1], run, 0);
  1637. for(y=0; y<h; y++){
  1638. for(x=0; x<w; x++){
  1639. int v, p=0, lv;
  1640. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  1641. int d=0, r=0, rd=0, ld=0;
  1642. v= src[x + y*stride];
  1643. if(y){
  1644. t= src[x + (y-1)*stride];
  1645. if(x){
  1646. lt= src[x - 1 + (y-1)*stride];
  1647. }
  1648. if(x + 1 < w){
  1649. rt= src[x + 1 + (y-1)*stride];
  1650. }
  1651. }
  1652. if(x){
  1653. l= src[x - 1 + y*stride];
  1654. /*if(x > 1){
  1655. if(orientation==1) ll= src[y + (x-2)*stride];
  1656. else ll= src[x - 2 + y*stride];
  1657. }*/
  1658. }
  1659. if(y+1<h){
  1660. d= src[x + (y+1)*stride];
  1661. if(x) ld= src[x - 1 + (y+1)*stride];
  1662. if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
  1663. }
  1664. if(x + 1 < w)
  1665. r= src[x + 1 + y*stride];
  1666. if(parent){
  1667. int px= x>>1;
  1668. int py= y>>1;
  1669. if(px<b->parent->width && py<b->parent->height)
  1670. p= parent[px + py*2*stride];
  1671. }
  1672. lv=v;
  1673. HIDE( v, plane)
  1674. HIDE(lv, plane+1)
  1675. HIDE( p, plane)
  1676. HIDE( l, plane)
  1677. HIDE(lt, plane)
  1678. HIDE( t, plane)
  1679. HIDE(rt, plane)
  1680. HIDE( r, plane+1)
  1681. HIDE(rd, plane+1)
  1682. HIDE( d, plane+1)
  1683. HIDE(ld, plane+1)
  1684. if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
  1685. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
  1686. +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
  1687. if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
  1688. else put_cabac(&s->c, &b->state[ 0][context], !!v);
  1689. }else{
  1690. assert(!lv);
  1691. if(!run){
  1692. run= runs[run_index++];
  1693. put_symbol(&s->c, b->state[1], run, 0);
  1694. assert(v);
  1695. }else{
  1696. run--;
  1697. assert(!v);
  1698. }
  1699. }
  1700. if(v && !lv){
  1701. int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
  1702. + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
  1703. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
  1704. }
  1705. }
  1706. }
  1707. }
  1708. return;
  1709. #endif
  1710. }
  1711. static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1712. const int level= b->level;
  1713. const int w= b->width;
  1714. const int h= b->height;
  1715. int x, y;
  1716. #if 0
  1717. if(orientation==3 && parent && 0){
  1718. int16_t candidate[w*h][2];
  1719. uint8_t state[w*h];
  1720. int16_t boarder[3][w*h*4][2];
  1721. int16_t significant[w*h][2];
  1722. int candidate_count=0;
  1723. int boarder_count[3]={0,0,0};
  1724. int significant_count=0;
  1725. int rle_pos=0;
  1726. int v, last_v;
  1727. int primary= orientation==1;
  1728. memset(candidate, 0, sizeof(candidate));
  1729. memset(state, 0, sizeof(state));
  1730. memset(boarder, 0, sizeof(boarder));
  1731. for(y=0; y<h; y++){
  1732. for(x=0; x<w; x++){
  1733. if(parent[(x>>1) + (y>>1)*2*stride])
  1734. SVI(candidate, candidate_count, x, y)
  1735. }
  1736. }
  1737. for(;;){
  1738. while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
  1739. candidate_count--;
  1740. x= candidate[ candidate_count][0];
  1741. y= candidate[ candidate_count][1];
  1742. if(state[x + y*w])
  1743. continue;
  1744. state[x + y*w]= 1;
  1745. v= !!src[x + y*stride];
  1746. put_cabac(&s->c, &b->state[0][0], v);
  1747. if(v){
  1748. SVI(significant, significant_count, x,y)
  1749. if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
  1750. if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
  1751. if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
  1752. if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
  1753. if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
  1754. if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
  1755. if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
  1756. if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
  1757. }
  1758. }
  1759. while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
  1760. int run=0;
  1761. for(; rle_pos < w*h;){
  1762. x= rle_pos % w; //FIXME speed
  1763. y= rle_pos / w;
  1764. rle_pos++;
  1765. if(state[x + y*w])
  1766. continue;
  1767. state[x + y*w]= 1;
  1768. v= !!src[x + y*stride];
  1769. if(v){
  1770. put_symbol(&s->c, b->state[1], run, 0);
  1771. SVI(significant, significant_count, x,y)
  1772. if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
  1773. if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
  1774. if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
  1775. if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
  1776. if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
  1777. if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
  1778. if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
  1779. if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
  1780. break;
  1781. //FIXME note only right & down can be boarders
  1782. }
  1783. run++;
  1784. }
  1785. }
  1786. if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
  1787. break;
  1788. while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
  1789. int index;
  1790. if (boarder_count[ primary]) index= primary;
  1791. else if(boarder_count[1-primary]) index=1-primary;
  1792. else index=2;
  1793. boarder_count[index]--;
  1794. x= boarder[index][ boarder_count[index] ][0];
  1795. y= boarder[index][ boarder_count[index] ][1];
  1796. if(state[x + y*w]) //FIXME maybe check earlier
  1797. continue;
  1798. state[x + y*w]= 1;
  1799. v= !!src[x + y*stride];
  1800. put_cabac(&s->c, &b->state[0][index+1], v);
  1801. if(v){
  1802. SVI(significant, significant_count, x,y)
  1803. if(x && !state[x - 1 + y *w]) SVI(boarder[0],boarder_count[0],x-1,y )
  1804. if(y && !state[x + (y-1)*w]) SVI(boarder[1],boarder_count[1],x ,y-1)
  1805. if(x+1<w && !state[x + 1 + y *w]) SVI(boarder[0],boarder_count[0],x+1,y )
  1806. if(y+1<h && !state[x + (y+1)*w]) SVI(boarder[1],boarder_count[1],x ,y+1)
  1807. if(x && y && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
  1808. if(x && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
  1809. if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
  1810. if(x+1<w && y && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
  1811. }
  1812. }
  1813. }
  1814. //FIXME sort significant coeffs maybe
  1815. if(1){
  1816. qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
  1817. }
  1818. last_v=1;
  1819. while(significant_count){
  1820. int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
  1821. significant_count--;
  1822. x= significant[significant_count][0];//FIXME try opposit direction
  1823. y= significant[significant_count][1];
  1824. v= src[x + y*stride];
  1825. put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
  1826. last_v= v;
  1827. }
  1828. }
  1829. #endif
  1830. }
  1831. static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1832. const int level= b->level;
  1833. const int w= b->width;
  1834. const int h= b->height;
  1835. int x, y;
  1836. if(1){
  1837. int run=0;
  1838. int runs[w*h];
  1839. int run_index=0;
  1840. for(y=0; y<h; y++){
  1841. for(x=0; x<w; x++){
  1842. int v, p=0;
  1843. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  1844. v= src[x + y*stride];
  1845. if(y){
  1846. t= src[x + (y-1)*stride];
  1847. if(x){
  1848. lt= src[x - 1 + (y-1)*stride];
  1849. }
  1850. if(x + 1 < w){
  1851. rt= src[x + 1 + (y-1)*stride];
  1852. }
  1853. }
  1854. if(x){
  1855. l= src[x - 1 + y*stride];
  1856. /*if(x > 1){
  1857. if(orientation==1) ll= src[y + (x-2)*stride];
  1858. else ll= src[x - 2 + y*stride];
  1859. }*/
  1860. }
  1861. if(parent){
  1862. int px= x>>1;
  1863. int py= y>>1;
  1864. if(px<b->parent->width && py<b->parent->height)
  1865. p= parent[px + py*2*stride];
  1866. }
  1867. if(!(/*ll|*/l|lt|t|rt|p)){
  1868. if(v){
  1869. runs[run_index++]= run;
  1870. run=0;
  1871. }else{
  1872. run++;
  1873. }
  1874. }
  1875. }
  1876. }
  1877. runs[run_index++]= run;
  1878. run_index=0;
  1879. run= runs[run_index++];
  1880. put_symbol2(&s->c, b->state[1], run, 3);
  1881. for(y=0; y<h; y++){
  1882. for(x=0; x<w; x++){
  1883. int v, p=0;
  1884. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  1885. v= src[x + y*stride];
  1886. if(y){
  1887. t= src[x + (y-1)*stride];
  1888. if(x){
  1889. lt= src[x - 1 + (y-1)*stride];
  1890. }
  1891. if(x + 1 < w){
  1892. rt= src[x + 1 + (y-1)*stride];
  1893. }
  1894. }
  1895. if(x){
  1896. l= src[x - 1 + y*stride];
  1897. /*if(x > 1){
  1898. if(orientation==1) ll= src[y + (x-2)*stride];
  1899. else ll= src[x - 2 + y*stride];
  1900. }*/
  1901. }
  1902. if(parent){
  1903. int px= x>>1;
  1904. int py= y>>1;
  1905. if(px<b->parent->width && py<b->parent->height)
  1906. p= parent[px + py*2*stride];
  1907. }
  1908. if(/*ll|*/l|lt|t|rt|p){
  1909. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  1910. put_cabac(&s->c, &b->state[0][context], !!v);
  1911. }else{
  1912. if(!run){
  1913. run= runs[run_index++];
  1914. put_symbol2(&s->c, b->state[1], run, 3);
  1915. assert(v);
  1916. }else{
  1917. run--;
  1918. assert(!v);
  1919. }
  1920. }
  1921. if(v){
  1922. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  1923. put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
  1924. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
  1925. }
  1926. }
  1927. }
  1928. }
  1929. }
  1930. static void encode_subband_dzr(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  1931. const int level= b->level;
  1932. const int w= b->width;
  1933. const int h= b->height;
  1934. int x, y;
  1935. if(1){
  1936. int run[16]={0};
  1937. int runs[16][w*h]; //FIXME do something about the size
  1938. int run_index[16]={0};
  1939. int positions[2][w];
  1940. int distances[2][w];
  1941. int dist_count=0;
  1942. int i;
  1943. for(y=0; y<h; y++){
  1944. int * pos = positions[ y&1];
  1945. int *last_pos = positions[(y&1)^1];
  1946. int * dist= distances[ y&1];
  1947. int *last_dist= distances[(y&1)^1];
  1948. int dist_index=0;
  1949. int last_dist_index=0;
  1950. for(x=0; x<w; x++){
  1951. int p=0, l=0, lt=0, t=0, rt=0;
  1952. int v= src[x + y*stride];
  1953. if(y){
  1954. t= src[x + (y-1)*stride];
  1955. if(x){
  1956. lt= src[x - 1 + (y-1)*stride];
  1957. }
  1958. if(x + 1 < w){
  1959. rt= src[x + 1 + (y-1)*stride];
  1960. }
  1961. }
  1962. if(x){
  1963. l= src[x - 1 + y*stride];
  1964. }
  1965. if(parent){
  1966. int px= x>>1;
  1967. int py= y>>1;
  1968. if(px<b->parent->width && py<b->parent->height)
  1969. p= parent[px + py*2*stride];
  1970. }
  1971. if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
  1972. if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
  1973. pos[dist_index]= x;
  1974. dist[dist_index++]= last_dist[last_dist_index];
  1975. }
  1976. last_dist_index++;
  1977. }
  1978. if(!(l|lt|t|rt|p)){
  1979. int cur_dist=w>>1;
  1980. int run_class;
  1981. if(last_dist_index < dist_count)
  1982. cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
  1983. if(dist_index)
  1984. cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
  1985. assert(cur_dist>=2);
  1986. run_class= av_log2(cur_dist+62);
  1987. if(v){
  1988. runs[run_class][run_index[run_class]++]= run[run_class];
  1989. run[run_class]=0;
  1990. }else{
  1991. run[run_class]++;
  1992. }
  1993. }
  1994. if(v){
  1995. while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
  1996. dist_index--;
  1997. pos[dist_index]= x;
  1998. dist[dist_index++]= y;
  1999. }
  2000. }
  2001. dist_count= dist_index;
  2002. }
  2003. for(i=0; i<12; i++){
  2004. runs[i][run_index[i]++]= run[i];
  2005. run_index[i]=0;
  2006. run[i]=0;
  2007. }
  2008. dist_count=0;
  2009. for(y=0; y<h; y++){
  2010. int * pos = positions[ y&1];
  2011. int *last_pos = positions[(y&1)^1];
  2012. int * dist= distances[ y&1];
  2013. int *last_dist= distances[(y&1)^1];
  2014. int dist_index=0;
  2015. int last_dist_index=0;
  2016. for(x=0; x<w; x++){
  2017. int p=0, l=0, lt=0, t=0, rt=0;
  2018. int v= src[x + y*stride];
  2019. if(y){
  2020. t= src[x + (y-1)*stride];
  2021. if(x){
  2022. lt= src[x - 1 + (y-1)*stride];
  2023. }
  2024. if(x + 1 < w){
  2025. rt= src[x + 1 + (y-1)*stride];
  2026. }
  2027. }
  2028. if(x){
  2029. l= src[x - 1 + y*stride];
  2030. }
  2031. if(parent){
  2032. int px= x>>1;
  2033. int py= y>>1;
  2034. if(px<b->parent->width && py<b->parent->height)
  2035. p= parent[px + py*2*stride];
  2036. }
  2037. if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
  2038. if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
  2039. pos[dist_index]= x;
  2040. dist[dist_index++]= last_dist[last_dist_index];
  2041. }
  2042. last_dist_index++;
  2043. }
  2044. if(l|lt|t|rt|p){
  2045. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  2046. put_cabac(&s->c, &b->state[0][context], !!v);
  2047. }else{
  2048. int cur_dist=w>>1;
  2049. int run_class;
  2050. if(last_dist_index < dist_count)
  2051. cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
  2052. if(dist_index)
  2053. cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
  2054. assert(cur_dist>=2);
  2055. assert(!dist_index || (pos[dist_index-1] >= 0 && pos[dist_index-1] <w));
  2056. assert(last_dist_index >= dist_count || (last_pos[last_dist_index] >= 0 && last_pos[last_dist_index] <w));
  2057. assert(!dist_index || dist[dist_index-1] <= y);
  2058. assert(last_dist_index >= dist_count || last_dist[last_dist_index] < y);
  2059. assert(cur_dist <= y + FFMAX(x, w-x-1));
  2060. run_class= av_log2(cur_dist+62);
  2061. if(!run_index[run_class]){
  2062. run[run_class]= runs[run_class][run_index[run_class]++];
  2063. put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
  2064. }
  2065. if(!run[run_class]){
  2066. run[run_class]= runs[run_class][run_index[run_class]++];
  2067. put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
  2068. assert(v);
  2069. }else{
  2070. run[run_class]--;
  2071. assert(!v);
  2072. }
  2073. }
  2074. if(v){
  2075. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  2076. put_symbol(&s->c, b->state[context + 16], ABS(v)-1, 0);
  2077. put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
  2078. while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
  2079. dist_index--;
  2080. pos[dist_index]= x;
  2081. dist[dist_index++]= y;
  2082. }
  2083. }
  2084. dist_count= dist_index;
  2085. }
  2086. }
  2087. }
  2088. static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  2089. // encode_subband_qtree(s, b, src, parent, stride, orientation);
  2090. // encode_subband_z0run(s, b, src, parent, stride, orientation);
  2091. encode_subband_c0run(s, b, src, parent, stride, orientation);
  2092. // encode_subband_dzr(s, b, src, parent, stride, orientation);
  2093. }
  2094. static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
  2095. const int level= b->level;
  2096. const int w= b->width;
  2097. const int h= b->height;
  2098. int x,y;
  2099. START_TIMER
  2100. #if 0
  2101. for(y=0; y<b->height; y++)
  2102. memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
  2103. int plane;
  2104. for(plane=24; plane>=0; plane--){
  2105. int run;
  2106. run= get_symbol(&s->c, b->state[1], 0);
  2107. #define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
  2108. for(y=0; y<h; y++){
  2109. for(x=0; x<w; x++){
  2110. int v, p=0, lv;
  2111. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  2112. int d=0, r=0, rd=0, ld=0;
  2113. lv= src[x + y*stride];
  2114. if(y){
  2115. t= src[x + (y-1)*stride];
  2116. if(x){
  2117. lt= src[x - 1 + (y-1)*stride];
  2118. }
  2119. if(x + 1 < w){
  2120. rt= src[x + 1 + (y-1)*stride];
  2121. }
  2122. }
  2123. if(x){
  2124. l= src[x - 1 + y*stride];
  2125. /*if(x > 1){
  2126. if(orientation==1) ll= src[y + (x-2)*stride];
  2127. else ll= src[x - 2 + y*stride];
  2128. }*/
  2129. }
  2130. if(y+1<h){
  2131. d= src[x + (y+1)*stride];
  2132. if(x) ld= src[x - 1 + (y+1)*stride];
  2133. if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
  2134. }
  2135. if(x + 1 < w)
  2136. r= src[x + 1 + y*stride];
  2137. if(parent){
  2138. int px= x>>1;
  2139. int py= y>>1;
  2140. if(px<b->parent->width && py<b->parent->height)
  2141. p= parent[px + py*2*stride];
  2142. }
  2143. HIDE( p, plane)
  2144. if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
  2145. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
  2146. +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
  2147. if(lv){
  2148. assert(context + 8*av_log2(ABS(lv)) < 512 - 100);
  2149. if(get_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)])){
  2150. if(lv<0) v= lv - (1<<plane);
  2151. else v= lv + (1<<plane);
  2152. }else
  2153. v=lv;
  2154. }else{
  2155. v= get_cabac(&s->c, &b->state[ 0][context]) << plane;
  2156. }
  2157. }else{
  2158. assert(!lv);
  2159. if(!run){
  2160. run= get_symbol(&s->c, b->state[1], 0);
  2161. v= 1<<plane;
  2162. }else{
  2163. run--;
  2164. v=0;
  2165. }
  2166. }
  2167. if(v && !lv){
  2168. int context= clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
  2169. + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
  2170. if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + context]))
  2171. v= -v;
  2172. }
  2173. src[x + y*stride]= v;
  2174. }
  2175. }
  2176. }
  2177. return;
  2178. #endif
  2179. #if 0
  2180. int tree[10][w*h]; //FIXME space waste ...
  2181. int treedim[10][2];
  2182. int lev;
  2183. const int max_level= av_log2(2*FFMAX(w,h)-1);
  2184. int w2=w, h2=h;
  2185. memset(tree, 0, sizeof(tree));
  2186. // assert(w%2==0 && h%2==0);
  2187. for(lev=max_level; lev>=0; lev--){
  2188. treedim[lev][0]= w2;
  2189. treedim[lev][1]= h2;
  2190. w2= (w2+1)>>1;
  2191. h2= (h2+1)>>1;
  2192. }
  2193. for(lev=0; lev<=max_level; lev++){
  2194. w2= treedim[lev][0];
  2195. h2= treedim[lev][1];
  2196. for(y=0; y<h2; y++){
  2197. for(x=0; x<w2; x++){
  2198. int l= 0, t=0;
  2199. int context;
  2200. if(lev && !tree[lev-1][x/2 + y/2*w])
  2201. continue;
  2202. if(x) l= tree[lev][x - 1 + y*w];
  2203. if(y) t= tree[lev][x + (y-1)*w];
  2204. context= lev + 8*(!!l) + 16*(!!t);
  2205. tree[lev][x + y*w]= get_cabac(&s->c, &b->state[98][context]);
  2206. }
  2207. }
  2208. }
  2209. if(1){
  2210. for(y=0; y<b->height; y++)
  2211. memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
  2212. for(y=0; y<h; y++){
  2213. for(x=0; x<w; x++){
  2214. int v, p=0;
  2215. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  2216. if(y){
  2217. t= src[x + (y-1)*stride];
  2218. if(x){
  2219. lt= src[x - 1 + (y-1)*stride];
  2220. }
  2221. if(x + 1 < w){
  2222. rt= src[x + 1 + (y-1)*stride];
  2223. }
  2224. }
  2225. if(x){
  2226. l= src[x - 1 + y*stride];
  2227. /*if(x > 1){
  2228. if(orientation==1) ll= src[y + (x-2)*stride];
  2229. else ll= src[x - 2 + y*stride];
  2230. }*/
  2231. }
  2232. if(parent){
  2233. int px= x>>1;
  2234. int py= y>>1;
  2235. if(px<b->parent->width && py<b->parent->height)
  2236. p= parent[px + py*2*stride];
  2237. }
  2238. if(tree[max_level][x + y*w]){
  2239. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  2240. v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
  2241. if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
  2242. v= -v;
  2243. src[x + y*stride]= v;
  2244. }
  2245. }
  2246. }
  2247. if(level+1 == s->spatial_decomposition_count){
  2248. STOP_TIMER("decode_subband")
  2249. }
  2250. return;
  2251. }
  2252. #endif
  2253. if(1){
  2254. int run;
  2255. for(y=0; y<b->height; y++)
  2256. memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
  2257. run= get_symbol2(&s->c, b->state[1], 3);
  2258. for(y=0; y<h; y++){
  2259. for(x=0; x<w; x++){
  2260. int v, p=0;
  2261. int /*ll=0, */l=0, lt=0, t=0, rt=0;
  2262. if(y){
  2263. t= src[x + (y-1)*stride];
  2264. if(x){
  2265. lt= src[x - 1 + (y-1)*stride];
  2266. }
  2267. if(x + 1 < w){
  2268. rt= src[x + 1 + (y-1)*stride];
  2269. }
  2270. }
  2271. if(x){
  2272. l= src[x - 1 + y*stride];
  2273. /*if(x > 1){
  2274. if(orientation==1) ll= src[y + (x-2)*stride];
  2275. else ll= src[x - 2 + y*stride];
  2276. }*/
  2277. }
  2278. if(parent){
  2279. int px= x>>1;
  2280. int py= y>>1;
  2281. if(px<b->parent->width && py<b->parent->height)
  2282. p= parent[px + py*2*stride];
  2283. }
  2284. if(/*ll|*/l|lt|t|rt|p){
  2285. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  2286. v=get_cabac(&s->c, &b->state[0][context]);
  2287. }else{
  2288. if(!run){
  2289. run= get_symbol2(&s->c, b->state[1], 3);
  2290. //FIXME optimize this here
  2291. //FIXME try to store a more naive run
  2292. v=1;
  2293. }else{
  2294. run--;
  2295. v=0;
  2296. }
  2297. }
  2298. if(v){
  2299. int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
  2300. v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
  2301. if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
  2302. v= -v;
  2303. src[x + y*stride]= v;
  2304. }
  2305. }
  2306. }
  2307. if(level+1 == s->spatial_decomposition_count){
  2308. STOP_TIMER("decode_subband")
  2309. }
  2310. return;
  2311. }
  2312. }
  2313. static void reset_contexts(SnowContext *s){
  2314. int plane_index, level, orientation;
  2315. for(plane_index=0; plane_index<2; plane_index++){
  2316. for(level=0; level<s->spatial_decomposition_count; level++){
  2317. for(orientation=level ? 1:0; orientation<4; orientation++){
  2318. memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
  2319. }
  2320. }
  2321. }
  2322. memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
  2323. memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
  2324. memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
  2325. memset(s->header_state, 0, sizeof(s->header_state));
  2326. }
  2327. static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
  2328. int x, y;
  2329. for(y=0; y < b_h+5; y++){
  2330. for(x=0; x < b_w; x++){
  2331. int a0= src[x + y*stride];
  2332. int a1= src[x + 1 + y*stride];
  2333. int a2= src[x + 2 + y*stride];
  2334. int a3= src[x + 3 + y*stride];
  2335. int a4= src[x + 4 + y*stride];
  2336. int a5= src[x + 5 + y*stride];
  2337. // int am= 9*(a1+a2) - (a0+a3);
  2338. int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
  2339. // int am= 18*(a2+a3) - 2*(a1+a4);
  2340. // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
  2341. // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
  2342. // if(b_w==16) am= 8*(a1+a2);
  2343. if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) + am* dx + 128)>>8;
  2344. else tmp[x + y*stride]= ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
  2345. /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
  2346. else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
  2347. else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
  2348. else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
  2349. }
  2350. }
  2351. for(y=0; y < b_h; y++){
  2352. for(x=0; x < b_w; x++){
  2353. int a0= tmp[x + y *stride];
  2354. int a1= tmp[x + (y + 1)*stride];
  2355. int a2= tmp[x + (y + 2)*stride];
  2356. int a3= tmp[x + (y + 3)*stride];
  2357. int a4= tmp[x + (y + 4)*stride];
  2358. int a5= tmp[x + (y + 5)*stride];
  2359. int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
  2360. // int am= 18*(a2+a3) - 2*(a1+a4);
  2361. /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
  2362. int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
  2363. // if(b_w==16) am= 8*(a1+a2);
  2364. if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) + am* dy + 128)>>8;
  2365. else dst[x + y*stride]= ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
  2366. /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
  2367. else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
  2368. else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
  2369. else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
  2370. }
  2371. }
  2372. }
  2373. #define mcb(dx,dy,b_w)\
  2374. static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
  2375. uint8_t tmp[stride*(b_w+5)];\
  2376. mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
  2377. }
  2378. mcb( 0, 0,16)
  2379. mcb( 4, 0,16)
  2380. mcb( 8, 0,16)
  2381. mcb(12, 0,16)
  2382. mcb( 0, 4,16)
  2383. mcb( 4, 4,16)
  2384. mcb( 8, 4,16)
  2385. mcb(12, 4,16)
  2386. mcb( 0, 8,16)
  2387. mcb( 4, 8,16)
  2388. mcb( 8, 8,16)
  2389. mcb(12, 8,16)
  2390. mcb( 0,12,16)
  2391. mcb( 4,12,16)
  2392. mcb( 8,12,16)
  2393. mcb(12,12,16)
  2394. #define mca(dx,dy,b_w)\
  2395. static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
  2396. uint8_t tmp[stride*(b_w+5)];\
  2397. assert(h==b_w);\
  2398. mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
  2399. }
  2400. mca( 0, 0,16)
  2401. mca( 8, 0,16)
  2402. mca( 0, 8,16)
  2403. mca( 8, 8,16)
  2404. static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
  2405. uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
  2406. int x,y;
  2407. if(s_x<0){
  2408. obmc -= s_x;
  2409. b_w += s_x;
  2410. s_x=0;
  2411. }else if(s_x + b_w > w){
  2412. b_w = w - s_x;
  2413. }
  2414. if(s_y<0){
  2415. obmc -= s_y*obmc_stride;
  2416. b_h += s_y;
  2417. s_y=0;
  2418. }else if(s_y + b_h> h){
  2419. b_h = h - s_y;
  2420. }
  2421. if(b_w<=0 || b_h<=0) return;
  2422. dst += s_x + s_y*dst_stride;
  2423. if(mb_type==1){
  2424. src += s_x + s_y*src_stride;
  2425. for(y=0; y < b_h; y++){
  2426. for(x=0; x < b_w; x++){
  2427. if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
  2428. else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
  2429. }
  2430. }
  2431. }else{
  2432. int dx= mv_x&15;
  2433. int dy= mv_y&15;
  2434. // int dxy= (mv_x&1) + 2*(mv_y&1);
  2435. s_x += (mv_x>>4) - 2;
  2436. s_y += (mv_y>>4) - 2;
  2437. src += s_x + s_y*src_stride;
  2438. //use dsputil
  2439. if( (unsigned)s_x >= w - b_w - 4
  2440. || (unsigned)s_y >= h - b_h - 4){
  2441. ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
  2442. src= tmp + 32;
  2443. }
  2444. if(mb_type==0){
  2445. mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
  2446. }else{
  2447. int sum=0;
  2448. for(y=0; y < b_h; y++){
  2449. for(x=0; x < b_w; x++){
  2450. sum += src[x+ y*src_stride];
  2451. }
  2452. }
  2453. sum= (sum + b_h*b_w/2) / (b_h*b_w);
  2454. for(y=0; y < b_h; y++){
  2455. for(x=0; x < b_w; x++){
  2456. tmp[x + y*src_stride]= sum;
  2457. }
  2458. }
  2459. }
  2460. for(y=0; y < b_h; y++){
  2461. for(x=0; x < b_w; x++){
  2462. if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
  2463. else dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
  2464. }
  2465. }
  2466. }
  2467. }
  2468. static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
  2469. Plane *p= &s->plane[plane_index];
  2470. const int mb_w= s->mb_band.width;
  2471. const int mb_h= s->mb_band.height;
  2472. const int mb_stride= s->mb_band.stride;
  2473. int x, y, mb_x, mb_y;
  2474. int scale = plane_index ? s->mv_scale : 2*s->mv_scale;
  2475. int block_w = plane_index ? 8 : 16;
  2476. uint8_t *obmc = plane_index ? obmc16 : obmc32;
  2477. int obmc_stride= plane_index ? 16 : 32;
  2478. int ref_stride= s->last_picture.linesize[plane_index];
  2479. uint8_t *ref = s->last_picture.data[plane_index];
  2480. int w= p->width;
  2481. int h= p->height;
  2482. if(s->avctx->debug&512){
  2483. for(y=0; y<h; y++){
  2484. for(x=0; x<w; x++){
  2485. if(add) buf[x + y*w]+= 128*256;
  2486. else buf[x + y*w]-= 128*256;
  2487. }
  2488. }
  2489. return;
  2490. }
  2491. for(mb_y=-1; mb_y<=mb_h; mb_y++){
  2492. for(mb_x=-1; mb_x<=mb_w; mb_x++){
  2493. int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
  2494. add_xblock(buf, ref, obmc,
  2495. block_w*mb_x - block_w/2,
  2496. block_w*mb_y - block_w/2,
  2497. 2*block_w, 2*block_w,
  2498. s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
  2499. w, h,
  2500. w, ref_stride, obmc_stride,
  2501. s->mb_band.buf[index], add);
  2502. }
  2503. }
  2504. }
  2505. static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
  2506. const int level= b->level;
  2507. const int w= b->width;
  2508. const int h= b->height;
  2509. const int qlog= clip(s->qlog + b->qlog, 0, 128);
  2510. const int qmul= qexp[qlog&7]<<(qlog>>3);
  2511. int x,y, thres1, thres2;
  2512. START_TIMER
  2513. assert(QROOT==8);
  2514. bias= bias ? 0 : (3*qmul)>>3;
  2515. thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
  2516. thres2= 2*thres1;
  2517. if(!bias){
  2518. for(y=0; y<h; y++){
  2519. for(x=0; x<w; x++){
  2520. int i= src[x + y*stride];
  2521. if((unsigned)(i+thres1) > thres2){
  2522. if(i>=0){
  2523. i<<= QEXPSHIFT;
  2524. i/= qmul; //FIXME optimize
  2525. src[x + y*stride]= i;
  2526. }else{
  2527. i= -i;
  2528. i<<= QEXPSHIFT;
  2529. i/= qmul; //FIXME optimize
  2530. src[x + y*stride]= -i;
  2531. }
  2532. }else
  2533. src[x + y*stride]= 0;
  2534. }
  2535. }
  2536. }else{
  2537. for(y=0; y<h; y++){
  2538. for(x=0; x<w; x++){
  2539. int i= src[x + y*stride];
  2540. if((unsigned)(i+thres1) > thres2){
  2541. if(i>=0){
  2542. i<<= QEXPSHIFT;
  2543. i= (i + bias) / qmul; //FIXME optimize
  2544. src[x + y*stride]= i;
  2545. }else{
  2546. i= -i;
  2547. i<<= QEXPSHIFT;
  2548. i= (i + bias) / qmul; //FIXME optimize
  2549. src[x + y*stride]= -i;
  2550. }
  2551. }else
  2552. src[x + y*stride]= 0;
  2553. }
  2554. }
  2555. }
  2556. if(level+1 == s->spatial_decomposition_count){
  2557. // STOP_TIMER("quantize")
  2558. }
  2559. }
  2560. static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
  2561. const int level= b->level;
  2562. const int w= b->width;
  2563. const int h= b->height;
  2564. const int qlog= clip(s->qlog + b->qlog, 0, 128);
  2565. const int qmul= qexp[qlog&7]<<(qlog>>3);
  2566. const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
  2567. int x,y;
  2568. assert(QROOT==8);
  2569. for(y=0; y<h; y++){
  2570. for(x=0; x<w; x++){
  2571. int i= src[x + y*stride];
  2572. if(i<0){
  2573. src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
  2574. }else if(i>0){
  2575. src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
  2576. }
  2577. }
  2578. }
  2579. }
  2580. static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
  2581. const int w= b->width;
  2582. const int h= b->height;
  2583. int x,y;
  2584. for(y=h-1; y>=0; y--){
  2585. for(x=w-1; x>=0; x--){
  2586. int i= x + y*stride;
  2587. if(x){
  2588. if(use_median){
  2589. if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
  2590. else src[i] -= src[i - 1];
  2591. }else{
  2592. if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
  2593. else src[i] -= src[i - 1];
  2594. }
  2595. }else{
  2596. if(y) src[i] -= src[i - stride];
  2597. }
  2598. }
  2599. }
  2600. }
  2601. static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
  2602. const int w= b->width;
  2603. const int h= b->height;
  2604. int x,y;
  2605. for(y=0; y<h; y++){
  2606. for(x=0; x<w; x++){
  2607. int i= x + y*stride;
  2608. if(x){
  2609. if(use_median){
  2610. if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
  2611. else src[i] += src[i - 1];
  2612. }else{
  2613. if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
  2614. else src[i] += src[i - 1];
  2615. }
  2616. }else{
  2617. if(y) src[i] += src[i - stride];
  2618. }
  2619. }
  2620. }
  2621. }
  2622. static void encode_header(SnowContext *s){
  2623. int plane_index, level, orientation;
  2624. put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
  2625. if(s->keyframe){
  2626. put_symbol(&s->c, s->header_state, s->version, 0);
  2627. put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
  2628. put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
  2629. put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
  2630. put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
  2631. put_symbol(&s->c, s->header_state, s->b_width, 0);
  2632. put_symbol(&s->c, s->header_state, s->b_height, 0);
  2633. put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
  2634. put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
  2635. put_cabac(&s->c, s->header_state, s->spatial_scalability);
  2636. // put_cabac(&s->c, s->header_state, s->rate_scalability);
  2637. for(plane_index=0; plane_index<2; plane_index++){
  2638. for(level=0; level<s->spatial_decomposition_count; level++){
  2639. for(orientation=level ? 1:0; orientation<4; orientation++){
  2640. if(orientation==2) continue;
  2641. put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
  2642. }
  2643. }
  2644. }
  2645. }
  2646. put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
  2647. put_symbol(&s->c, s->header_state, s->qlog, 1);
  2648. put_symbol(&s->c, s->header_state, s->mv_scale, 0);
  2649. put_symbol(&s->c, s->header_state, s->qbias, 1);
  2650. }
  2651. static int decode_header(SnowContext *s){
  2652. int plane_index, level, orientation;
  2653. s->keyframe= get_cabac(&s->c, s->header_state);
  2654. if(s->keyframe){
  2655. s->version= get_symbol(&s->c, s->header_state, 0);
  2656. if(s->version>0){
  2657. av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
  2658. return -1;
  2659. }
  2660. s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
  2661. s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
  2662. s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
  2663. s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
  2664. s->b_width= get_symbol(&s->c, s->header_state, 0);
  2665. s->b_height= get_symbol(&s->c, s->header_state, 0);
  2666. s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
  2667. s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
  2668. s->spatial_scalability= get_cabac(&s->c, s->header_state);
  2669. // s->rate_scalability= get_cabac(&s->c, s->header_state);
  2670. for(plane_index=0; plane_index<3; plane_index++){
  2671. for(level=0; level<s->spatial_decomposition_count; level++){
  2672. for(orientation=level ? 1:0; orientation<4; orientation++){
  2673. int q;
  2674. if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
  2675. else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
  2676. else q= get_symbol(&s->c, s->header_state, 1);
  2677. s->plane[plane_index].band[level][orientation].qlog= q;
  2678. }
  2679. }
  2680. }
  2681. }
  2682. s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
  2683. if(s->spatial_decomposition_type > 2){
  2684. av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
  2685. return -1;
  2686. }
  2687. s->qlog= get_symbol(&s->c, s->header_state, 1);
  2688. s->mv_scale= get_symbol(&s->c, s->header_state, 0);
  2689. s->qbias= get_symbol(&s->c, s->header_state, 1);
  2690. return 0;
  2691. }
  2692. static int common_init(AVCodecContext *avctx){
  2693. SnowContext *s = avctx->priv_data;
  2694. int width, height;
  2695. int level, orientation, plane_index, dec;
  2696. s->avctx= avctx;
  2697. dsputil_init(&s->dsp, avctx);
  2698. #define mcf(dx,dy)\
  2699. s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
  2700. s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
  2701. mc_block ## dx ## dy;
  2702. mcf( 0, 0)
  2703. mcf( 4, 0)
  2704. mcf( 8, 0)
  2705. mcf(12, 0)
  2706. mcf( 0, 4)
  2707. mcf( 4, 4)
  2708. mcf( 8, 4)
  2709. mcf(12, 4)
  2710. mcf( 0, 8)
  2711. mcf( 4, 8)
  2712. mcf( 8, 8)
  2713. mcf(12, 8)
  2714. mcf( 0,12)
  2715. mcf( 4,12)
  2716. mcf( 8,12)
  2717. mcf(12,12)
  2718. #define mcfh(dx,dy)\
  2719. s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
  2720. s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
  2721. mc_block_hpel ## dx ## dy;
  2722. mcfh(0, 0)
  2723. mcfh(8, 0)
  2724. mcfh(0, 8)
  2725. mcfh(8, 8)
  2726. dec= s->spatial_decomposition_count= 5;
  2727. s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
  2728. s->chroma_h_shift= 1; //FIXME XXX
  2729. s->chroma_v_shift= 1;
  2730. // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
  2731. s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
  2732. s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
  2733. s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
  2734. s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
  2735. s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
  2736. for(plane_index=0; plane_index<3; plane_index++){
  2737. int w= s->avctx->width;
  2738. int h= s->avctx->height;
  2739. if(plane_index){
  2740. w>>= s->chroma_h_shift;
  2741. h>>= s->chroma_v_shift;
  2742. }
  2743. s->plane[plane_index].width = w;
  2744. s->plane[plane_index].height= h;
  2745. av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
  2746. for(level=s->spatial_decomposition_count-1; level>=0; level--){
  2747. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  2748. SubBand *b= &s->plane[plane_index].band[level][orientation];
  2749. b->buf= s->spatial_dwt_buffer;
  2750. b->level= level;
  2751. b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
  2752. b->width = (w + !(orientation&1))>>1;
  2753. b->height= (h + !(orientation>1))>>1;
  2754. if(orientation&1) b->buf += (w+1)>>1;
  2755. if(orientation>1) b->buf += b->stride>>1;
  2756. // alloc_qtree(&b->tree, b->width, b->height);
  2757. if(level)
  2758. b->parent= &s->plane[plane_index].band[level-1][orientation];
  2759. }
  2760. w= (w+1)>>1;
  2761. h= (h+1)>>1;
  2762. }
  2763. }
  2764. //FIXME init_subband() ?
  2765. s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
  2766. s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
  2767. s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
  2768. s->mb_band .buf= av_mallocz(s->mb_band .stride * s->mb_band .height*sizeof(DWTELEM));
  2769. s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
  2770. s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
  2771. /* alloc_qtree(&s->mb_band .tree, s->mb_band .width, s->mb_band .height); //FIXME free these 3
  2772. alloc_qtree(&s->mv_band[0].tree, s->mv_band[0].width, s->mv_band[0].height);
  2773. alloc_qtree(&s->mv_band[1].tree, s->mv_band[0].width, s->mv_band[1].height);*/
  2774. reset_contexts(s);
  2775. /*
  2776. width= s->width= avctx->width;
  2777. height= s->height= avctx->height;
  2778. assert(width && height);
  2779. */
  2780. s->avctx->get_buffer(s->avctx, &s->mconly_picture);
  2781. return 0;
  2782. }
  2783. static void calculate_vissual_weight(SnowContext *s, Plane *p){
  2784. int width = p->width;
  2785. int height= p->height;
  2786. int i, level, orientation, x, y;
  2787. for(level=0; level<s->spatial_decomposition_count; level++){
  2788. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  2789. SubBand *b= &p->band[level][orientation];
  2790. DWTELEM *buf= b->buf;
  2791. int64_t error=0;
  2792. memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
  2793. buf[b->width/2 + b->height/2*b->stride]= 256*256;
  2794. spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
  2795. for(y=0; y<height; y++){
  2796. for(x=0; x<width; x++){
  2797. int64_t d= s->spatial_dwt_buffer[x + y*width];
  2798. error += d*d;
  2799. }
  2800. }
  2801. b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
  2802. av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
  2803. }
  2804. }
  2805. }
  2806. static int encode_init(AVCodecContext *avctx)
  2807. {
  2808. SnowContext *s = avctx->priv_data;
  2809. int i;
  2810. int level, orientation, plane_index;
  2811. if(avctx->strict_std_compliance >= 0){
  2812. av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
  2813. "use vstrict=-1 to use it anyway\n");
  2814. return -1;
  2815. }
  2816. common_init(avctx);
  2817. s->version=0;
  2818. s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
  2819. s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
  2820. s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
  2821. s->mb_type = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
  2822. s->mb_mean = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
  2823. s->dummy = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
  2824. h263_encode_init(&s->m); //mv_penalty
  2825. for(plane_index=0; plane_index<3; plane_index++){
  2826. calculate_vissual_weight(s, &s->plane[plane_index]);
  2827. }
  2828. avctx->coded_frame= &s->current_picture;
  2829. switch(avctx->pix_fmt){
  2830. // case PIX_FMT_YUV444P:
  2831. // case PIX_FMT_YUV422P:
  2832. case PIX_FMT_YUV420P:
  2833. case PIX_FMT_GRAY8:
  2834. // case PIX_FMT_YUV411P:
  2835. // case PIX_FMT_YUV410P:
  2836. s->colorspace_type= 0;
  2837. break;
  2838. /* case PIX_FMT_RGBA32:
  2839. s->colorspace= 1;
  2840. break;*/
  2841. default:
  2842. av_log(avctx, AV_LOG_ERROR, "format not supported\n");
  2843. return -1;
  2844. }
  2845. // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
  2846. s->chroma_h_shift= 1;
  2847. s->chroma_v_shift= 1;
  2848. return 0;
  2849. }
  2850. static int frame_start(SnowContext *s){
  2851. AVFrame tmp;
  2852. if(s->keyframe)
  2853. reset_contexts(s);
  2854. tmp= s->last_picture;
  2855. s->last_picture= s->current_picture;
  2856. s->current_picture= tmp;
  2857. s->current_picture.reference= 1;
  2858. if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
  2859. av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
  2860. return -1;
  2861. }
  2862. return 0;
  2863. }
  2864. static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
  2865. SnowContext *s = avctx->priv_data;
  2866. CABACContext * const c= &s->c;
  2867. AVFrame *pict = data;
  2868. const int width= s->avctx->width;
  2869. const int height= s->avctx->height;
  2870. int used_count= 0;
  2871. int log2_threshold, level, orientation, plane_index, i;
  2872. ff_init_cabac_encoder(c, buf, buf_size);
  2873. ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
  2874. s->input_picture = *pict;
  2875. memset(s->header_state, 0, sizeof(s->header_state));
  2876. s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
  2877. pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
  2878. s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
  2879. //<64 >60
  2880. s->qlog += 61;
  2881. for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
  2882. s->mb_band.buf[i]= s->keyframe;
  2883. }
  2884. frame_start(s);
  2885. if(pict->pict_type == P_TYPE){
  2886. int block_width = (width +15)>>4;
  2887. int block_height= (height+15)>>4;
  2888. int stride= s->current_picture.linesize[0];
  2889. uint8_t *src_plane= s->input_picture.data[0];
  2890. int src_stride= s->input_picture.linesize[0];
  2891. int x,y;
  2892. assert(s->current_picture.data[0]);
  2893. assert(s->last_picture.data[0]);
  2894. s->m.avctx= s->avctx;
  2895. s->m.current_picture.data[0]= s->current_picture.data[0];
  2896. s->m. last_picture.data[0]= s-> last_picture.data[0];
  2897. s->m. new_picture.data[0]= s-> input_picture.data[0];
  2898. s->m.current_picture_ptr= &s->m.current_picture;
  2899. s->m. last_picture_ptr= &s->m. last_picture;
  2900. s->m.linesize=
  2901. s->m. last_picture.linesize[0]=
  2902. s->m. new_picture.linesize[0]=
  2903. s->m.current_picture.linesize[0]= stride;
  2904. s->m.width = width;
  2905. s->m.height= height;
  2906. s->m.mb_width = block_width;
  2907. s->m.mb_height= block_height;
  2908. s->m.mb_stride= s->m.mb_width+1;
  2909. s->m.b8_stride= 2*s->m.mb_width+1;
  2910. s->m.f_code=1;
  2911. s->m.pict_type= pict->pict_type;
  2912. s->m.me_method= s->avctx->me_method;
  2913. s->m.me.scene_change_score=0;
  2914. s->m.flags= s->avctx->flags;
  2915. s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
  2916. s->m.out_format= FMT_H263;
  2917. s->m.unrestricted_mv= 1;
  2918. s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
  2919. s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
  2920. s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
  2921. if(!s->motion_val8){
  2922. s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
  2923. s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
  2924. }
  2925. s->m.mb_type= s->mb_type;
  2926. //dummies, to avoid segfaults
  2927. s->m.current_picture.mb_mean = s->mb_mean;
  2928. s->m.current_picture.mb_var = (int16_t*)s->dummy;
  2929. s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
  2930. s->m.current_picture.mb_type = s->dummy;
  2931. s->m.current_picture.motion_val[0]= s->motion_val8;
  2932. s->m.p_mv_table= s->motion_val16;
  2933. s->m.dsp= s->dsp; //move
  2934. ff_init_me(&s->m);
  2935. s->m.me.pre_pass=1;
  2936. s->m.me.dia_size= s->avctx->pre_dia_size;
  2937. s->m.first_slice_line=1;
  2938. for(y= block_height-1; y >= 0; y--) {
  2939. uint8_t src[stride*16];
  2940. s->m.new_picture.data[0]= src - y*16*stride; //ugly
  2941. s->m.mb_y= y;
  2942. for(i=0; i<16 && i + 16*y<height; i++){
  2943. memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
  2944. for(x=width; x<16*block_width; x++)
  2945. src[i*stride+x]= src[i*stride+x-1];
  2946. }
  2947. for(; i<16 && i + 16*y<16*block_height; i++)
  2948. memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
  2949. for(x=block_width-1; x >=0 ;x--) {
  2950. s->m.mb_x= x;
  2951. ff_init_block_index(&s->m);
  2952. ff_update_block_index(&s->m);
  2953. ff_pre_estimate_p_frame_motion(&s->m, x, y);
  2954. }
  2955. s->m.first_slice_line=0;
  2956. }
  2957. s->m.me.pre_pass=0;
  2958. s->m.me.dia_size= s->avctx->dia_size;
  2959. s->m.first_slice_line=1;
  2960. for (y = 0; y < block_height; y++) {
  2961. uint8_t src[stride*16];
  2962. s->m.new_picture.data[0]= src - y*16*stride; //ugly
  2963. s->m.mb_y= y;
  2964. assert(width <= stride);
  2965. assert(width <= 16*block_width);
  2966. for(i=0; i<16 && i + 16*y<height; i++){
  2967. memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
  2968. for(x=width; x<16*block_width; x++)
  2969. src[i*stride+x]= src[i*stride+x-1];
  2970. }
  2971. for(; i<16 && i + 16*y<16*block_height; i++)
  2972. memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
  2973. for (x = 0; x < block_width; x++) {
  2974. int mb_xy= x + y*(s->mb_band.stride);
  2975. s->m.mb_x= x;
  2976. ff_init_block_index(&s->m);
  2977. ff_update_block_index(&s->m);
  2978. ff_estimate_p_frame_motion(&s->m, x, y);
  2979. s->mb_band .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
  2980. ? 0 : 2;
  2981. s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
  2982. s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
  2983. if(s->mb_band .buf[x + y*(s->mb_band.stride)]==2 && 0){
  2984. int dc0=128, dc1=128, dc, dc2, dir;
  2985. int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
  2986. dc =s->mb_mean[x + y *s->m.mb_stride ];
  2987. if(x) dc0=s->mb_mean[x + y *s->m.mb_stride - 1];
  2988. if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride ];
  2989. dc2= (dc0+dc1)>>1;
  2990. #if 0
  2991. if (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
  2992. dir= 1;
  2993. else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
  2994. dir=-1;
  2995. else
  2996. dir=0;
  2997. #endif
  2998. if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
  2999. s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
  3000. s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
  3001. s->mb_mean[x + y *s->m.mb_stride ]= dc0;
  3002. }else if(y){
  3003. s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
  3004. s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
  3005. s->mb_mean[x + y *s->m.mb_stride ]= dc1;
  3006. }
  3007. }
  3008. // s->mb_band .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
  3009. }
  3010. s->m.first_slice_line=0;
  3011. }
  3012. assert(s->m.pict_type == P_TYPE);
  3013. if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
  3014. s->m.pict_type=
  3015. pict->pict_type =I_TYPE;
  3016. for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
  3017. s->mb_band.buf[i]= 1;
  3018. s->mv_band[0].buf[i]=
  3019. s->mv_band[1].buf[i]= 0;
  3020. }
  3021. //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
  3022. }
  3023. }
  3024. s->m.first_slice_line=1;
  3025. s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
  3026. encode_header(s);
  3027. decorrelate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 0, 1);
  3028. decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
  3029. decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
  3030. encode_subband(s, &s->mb_band , s->mb_band .buf, NULL, s->mb_band .stride, 0);
  3031. encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
  3032. encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
  3033. //FIXME avoid this
  3034. correlate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 1, 1);
  3035. correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
  3036. correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
  3037. for(plane_index=0; plane_index<3; plane_index++){
  3038. Plane *p= &s->plane[plane_index];
  3039. int w= p->width;
  3040. int h= p->height;
  3041. int x, y;
  3042. int bits= put_bits_count(&s->c.pb);
  3043. //FIXME optimize
  3044. #if QPRED
  3045. memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
  3046. predict_plane(s, s->pred_buffer, plane_index, 1);
  3047. spatial_dwt(s, s->pred_buffer, w, h, w);
  3048. for(level=0; level<s->spatial_decomposition_count; level++){
  3049. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3050. SubBand *b= &p->band[level][orientation];
  3051. int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
  3052. quantize (s, b, b->buf + delta, b->stride, s->qbias);
  3053. dequantize(s, b, b->buf + delta, b->stride);
  3054. }
  3055. }
  3056. for(y=0; y<h; y++){
  3057. for(x=0; x<w; x++){
  3058. s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
  3059. }
  3060. }
  3061. spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
  3062. for(y=0; y<h; y++){
  3063. for(x=0; x<w; x++){
  3064. s->spatial_dwt_buffer[y*w + x]-= s->pred_buffer[y*w + x];
  3065. }
  3066. }
  3067. #else
  3068. if(pict->data[plane_index]) //FIXME gray hack
  3069. for(y=0; y<h; y++){
  3070. for(x=0; x<w; x++){
  3071. s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
  3072. }
  3073. }
  3074. predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
  3075. spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
  3076. #endif
  3077. for(level=0; level<s->spatial_decomposition_count; level++){
  3078. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3079. SubBand *b= &p->band[level][orientation];
  3080. quantize(s, b, b->buf, b->stride, s->qbias);
  3081. if(orientation==0)
  3082. decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
  3083. encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
  3084. assert(b->parent==NULL || b->parent->stride == b->stride*2);
  3085. if(orientation==0)
  3086. correlate(s, b, b->buf, b->stride, 1, 0);
  3087. }
  3088. }
  3089. // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
  3090. for(level=0; level<s->spatial_decomposition_count; level++){
  3091. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3092. SubBand *b= &p->band[level][orientation];
  3093. dequantize(s, b, b->buf, b->stride);
  3094. }
  3095. }
  3096. #if QPRED
  3097. for(y=0; y<h; y++){
  3098. for(x=0; x<w; x++){
  3099. s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
  3100. }
  3101. }
  3102. spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
  3103. #else
  3104. spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
  3105. predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
  3106. #endif
  3107. //FIXME optimize
  3108. for(y=0; y<h; y++){
  3109. for(x=0; x<w; x++){
  3110. int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
  3111. if(v&(~255)) v= ~(v>>31);
  3112. s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
  3113. }
  3114. }
  3115. if(s->avctx->flags&CODEC_FLAG_PSNR){
  3116. int64_t error= 0;
  3117. if(pict->data[plane_index]) //FIXME gray hack
  3118. for(y=0; y<h; y++){
  3119. for(x=0; x<w; x++){
  3120. int d= s->spatial_dwt_buffer[y*w + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]*256;
  3121. error += d*d;
  3122. }
  3123. }
  3124. error= (error + 128*256)>>16;
  3125. s->avctx->error[plane_index] += error;
  3126. s->avctx->error[3] += error;
  3127. }
  3128. }
  3129. if(s->last_picture.data[0])
  3130. avctx->release_buffer(avctx, &s->last_picture);
  3131. emms_c();
  3132. return put_cabac_terminate(c, 1);
  3133. }
  3134. static void common_end(SnowContext *s){
  3135. int plane_index, level, orientation;
  3136. av_freep(&s->spatial_dwt_buffer);
  3137. av_freep(&s->mb_band.buf);
  3138. av_freep(&s->mv_band[0].buf);
  3139. av_freep(&s->mv_band[1].buf);
  3140. av_freep(&s->m.me.scratchpad);
  3141. av_freep(&s->m.me.map);
  3142. av_freep(&s->m.me.score_map);
  3143. av_freep(&s->mb_type);
  3144. av_freep(&s->mb_mean);
  3145. av_freep(&s->dummy);
  3146. av_freep(&s->motion_val8);
  3147. av_freep(&s->motion_val16);
  3148. /*
  3149. for(plane_index=0; plane_index<3; plane_index++){
  3150. for(level=s->spatial_decomposition_count-1; level>=0; level--){
  3151. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3152. SubBand *b= &s->plane[plane_index].band[level][orientation];
  3153. free_qtree(&b->tree);
  3154. }
  3155. }
  3156. }*/
  3157. }
  3158. static int encode_end(AVCodecContext *avctx)
  3159. {
  3160. SnowContext *s = avctx->priv_data;
  3161. common_end(s);
  3162. return 0;
  3163. }
  3164. static int decode_init(AVCodecContext *avctx)
  3165. {
  3166. // SnowContext *s = avctx->priv_data;
  3167. common_init(avctx);
  3168. return 0;
  3169. }
  3170. static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
  3171. SnowContext *s = avctx->priv_data;
  3172. CABACContext * const c= &s->c;
  3173. const int width= s->avctx->width;
  3174. const int height= s->avctx->height;
  3175. int bytes_read;
  3176. AVFrame *picture = data;
  3177. int log2_threshold, level, orientation, plane_index;
  3178. /* no supplementary picture */
  3179. if (buf_size == 0)
  3180. return 0;
  3181. ff_init_cabac_decoder(c, buf, buf_size);
  3182. ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
  3183. memset(s->header_state, 0, sizeof(s->header_state));
  3184. s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
  3185. decode_header(s);
  3186. frame_start(s);
  3187. //keyframe flag dupliaction mess FIXME
  3188. if(avctx->debug&FF_DEBUG_PICT_INFO)
  3189. av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
  3190. decode_subband(s, &s->mb_band , s->mb_band .buf, NULL, s->mb_band .stride, 0);
  3191. decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
  3192. decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
  3193. correlate(s, &s->mb_band , s->mb_band .buf, s->mb_band .stride, 1, 1);
  3194. correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
  3195. correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
  3196. for(plane_index=0; plane_index<3; plane_index++){
  3197. Plane *p= &s->plane[plane_index];
  3198. int w= p->width;
  3199. int h= p->height;
  3200. int x, y;
  3201. if(s->avctx->debug&2048){
  3202. memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
  3203. predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
  3204. for(y=0; y<h; y++){
  3205. for(x=0; x<w; x++){
  3206. int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
  3207. if(v&(~255)) v= ~(v>>31);
  3208. s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
  3209. }
  3210. }
  3211. }
  3212. for(level=0; level<s->spatial_decomposition_count; level++){
  3213. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3214. SubBand *b= &p->band[level][orientation];
  3215. decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
  3216. if(orientation==0)
  3217. correlate(s, b, b->buf, b->stride, 1, 0);
  3218. }
  3219. }
  3220. if(!(s->avctx->debug&1024))
  3221. for(level=0; level<s->spatial_decomposition_count; level++){
  3222. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3223. SubBand *b= &p->band[level][orientation];
  3224. dequantize(s, b, b->buf, b->stride);
  3225. }
  3226. }
  3227. #if QPRED
  3228. memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
  3229. predict_plane(s, s->pred_buffer, plane_index, 1);
  3230. spatial_dwt(s, s->pred_buffer, w, h, w);
  3231. for(level=0; level<s->spatial_decomposition_count; level++){
  3232. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3233. SubBand *b= &p->band[level][orientation];
  3234. int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
  3235. quantize (s, b, b->buf + delta, b->stride, s->qbias);
  3236. dequantize(s, b, b->buf + delta, b->stride);
  3237. }
  3238. }
  3239. for(y=0; y<h; y++){
  3240. for(x=0; x<w; x++){
  3241. s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
  3242. }
  3243. }
  3244. spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
  3245. #else
  3246. spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
  3247. predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
  3248. #endif
  3249. //FIXME optimize
  3250. for(y=0; y<h; y++){
  3251. for(x=0; x<w; x++){
  3252. int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
  3253. if(v&(~255)) v= ~(v>>31);
  3254. s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
  3255. }
  3256. }
  3257. }
  3258. emms_c();
  3259. if(s->last_picture.data[0])
  3260. avctx->release_buffer(avctx, &s->last_picture);
  3261. if(!(s->avctx->debug&2048))
  3262. *picture= s->current_picture;
  3263. else
  3264. *picture= s->mconly_picture;
  3265. *data_size = sizeof(AVFrame);
  3266. bytes_read= get_cabac_terminate(c);
  3267. if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
  3268. return bytes_read;
  3269. }
  3270. static int decode_end(AVCodecContext *avctx)
  3271. {
  3272. SnowContext *s = avctx->priv_data;
  3273. common_end(s);
  3274. return 0;
  3275. }
  3276. AVCodec snow_decoder = {
  3277. "snow",
  3278. CODEC_TYPE_VIDEO,
  3279. CODEC_ID_SNOW,
  3280. sizeof(SnowContext),
  3281. decode_init,
  3282. NULL,
  3283. decode_end,
  3284. decode_frame,
  3285. 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
  3286. NULL
  3287. };
  3288. AVCodec snow_encoder = {
  3289. "snow",
  3290. CODEC_TYPE_VIDEO,
  3291. CODEC_ID_SNOW,
  3292. sizeof(SnowContext),
  3293. encode_init,
  3294. encode_frame,
  3295. encode_end,
  3296. };
  3297. #if 0
  3298. #undef malloc
  3299. #undef free
  3300. #undef printf
  3301. int main(){
  3302. int width=256;
  3303. int height=256;
  3304. int buffer[2][width*height];
  3305. SnowContext s;
  3306. int i;
  3307. s.spatial_decomposition_count=6;
  3308. s.spatial_decomposition_type=1;
  3309. printf("testing 5/3 DWT\n");
  3310. for(i=0; i<width*height; i++)
  3311. buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
  3312. spatial_dwt(&s, buffer[0], width, height, width);
  3313. spatial_idwt(&s, buffer[0], width, height, width);
  3314. for(i=0; i<width*height; i++)
  3315. if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
  3316. printf("testing 9/7 DWT\n");
  3317. s.spatial_decomposition_type=0;
  3318. for(i=0; i<width*height; i++)
  3319. buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
  3320. spatial_dwt(&s, buffer[0], width, height, width);
  3321. spatial_idwt(&s, buffer[0], width, height, width);
  3322. for(i=0; i<width*height; i++)
  3323. if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
  3324. printf("testing AC coder\n");
  3325. memset(s.header_state, 0, sizeof(s.header_state));
  3326. ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
  3327. ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
  3328. for(i=-256; i<256; i++){
  3329. START_TIMER
  3330. put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
  3331. STOP_TIMER("put_symbol")
  3332. }
  3333. put_cabac_terminate(&s.c, 1);
  3334. memset(s.header_state, 0, sizeof(s.header_state));
  3335. ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
  3336. ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
  3337. for(i=-256; i<256; i++){
  3338. int j;
  3339. START_TIMER
  3340. j= get_symbol(&s.c, s.header_state, 1);
  3341. STOP_TIMER("get_symbol")
  3342. if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
  3343. }
  3344. {
  3345. int level, orientation, x, y;
  3346. int64_t errors[8][4];
  3347. int64_t g=0;
  3348. memset(errors, 0, sizeof(errors));
  3349. s.spatial_decomposition_count=3;
  3350. s.spatial_decomposition_type=0;
  3351. for(level=0; level<s.spatial_decomposition_count; level++){
  3352. for(orientation=level ? 1 : 0; orientation<4; orientation++){
  3353. int w= width >> (s.spatial_decomposition_count-level);
  3354. int h= height >> (s.spatial_decomposition_count-level);
  3355. int stride= width << (s.spatial_decomposition_count-level);
  3356. DWTELEM *buf= buffer[0];
  3357. int64_t error=0;
  3358. if(orientation&1) buf+=w;
  3359. if(orientation>1) buf+=stride>>1;
  3360. memset(buffer[0], 0, sizeof(int)*width*height);
  3361. buf[w/2 + h/2*stride]= 256*256;
  3362. spatial_idwt(&s, buffer[0], width, height, width);
  3363. for(y=0; y<height; y++){
  3364. for(x=0; x<width; x++){
  3365. int64_t d= buffer[0][x + y*width];
  3366. error += d*d;
  3367. if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
  3368. }
  3369. if(ABS(height/2-y)<9 && level==2) printf("\n");
  3370. }
  3371. error= (int)(sqrt(error)+0.5);
  3372. errors[level][orientation]= error;
  3373. if(g) g=ff_gcd(g, error);
  3374. else g= error;
  3375. }
  3376. }
  3377. printf("static int const visual_weight[][4]={\n");
  3378. for(level=0; level<s.spatial_decomposition_count; level++){
  3379. printf(" {");
  3380. for(orientation=0; orientation<4; orientation++){
  3381. printf("%8lld,", errors[level][orientation]/g);
  3382. }
  3383. printf("},\n");
  3384. }
  3385. printf("};\n");
  3386. {
  3387. int level=2;
  3388. int orientation=3;
  3389. int w= width >> (s.spatial_decomposition_count-level);
  3390. int h= height >> (s.spatial_decomposition_count-level);
  3391. int stride= width << (s.spatial_decomposition_count-level);
  3392. DWTELEM *buf= buffer[0];
  3393. int64_t error=0;
  3394. buf+=w;
  3395. buf+=stride>>1;
  3396. memset(buffer[0], 0, sizeof(int)*width*height);
  3397. #if 1
  3398. for(y=0; y<height; y++){
  3399. for(x=0; x<width; x++){
  3400. int tab[4]={0,2,3,1};
  3401. buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
  3402. }
  3403. }
  3404. spatial_dwt(&s, buffer[0], width, height, width);
  3405. #else
  3406. for(y=0; y<h; y++){
  3407. for(x=0; x<w; x++){
  3408. buf[x + y*stride ]=169;
  3409. buf[x + y*stride-w]=64;
  3410. }
  3411. }
  3412. spatial_idwt(&s, buffer[0], width, height, width);
  3413. #endif
  3414. for(y=0; y<height; y++){
  3415. for(x=0; x<width; x++){
  3416. int64_t d= buffer[0][x + y*width];
  3417. error += d*d;
  3418. if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
  3419. }
  3420. if(ABS(height/2-y)<9) printf("\n");
  3421. }
  3422. }
  3423. }
  3424. return 0;
  3425. }
  3426. #endif