You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

441 lines
13KB

  1. /*
  2. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  3. *
  4. * Triangular with Noise Shaping is based on opusfile.
  5. * Copyright (c) 1994-2012 by the Xiph.Org Foundation and contributors
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * Dithered Audio Sample Quantization
  26. *
  27. * Converts from dbl, flt, or s32 to s16 using dithering.
  28. */
  29. #include <math.h>
  30. #include <stdint.h>
  31. #include "libavutil/attributes.h"
  32. #include "libavutil/common.h"
  33. #include "libavutil/lfg.h"
  34. #include "libavutil/mem.h"
  35. #include "libavutil/samplefmt.h"
  36. #include "audio_convert.h"
  37. #include "dither.h"
  38. #include "internal.h"
  39. typedef struct DitherState {
  40. int mute;
  41. unsigned int seed;
  42. AVLFG lfg;
  43. float *noise_buf;
  44. int noise_buf_size;
  45. int noise_buf_ptr;
  46. float dither_a[4];
  47. float dither_b[4];
  48. } DitherState;
  49. struct DitherContext {
  50. DitherDSPContext ddsp;
  51. enum AVResampleDitherMethod method;
  52. int apply_map;
  53. ChannelMapInfo *ch_map_info;
  54. int mute_dither_threshold; // threshold for disabling dither
  55. int mute_reset_threshold; // threshold for resetting noise shaping
  56. const float *ns_coef_b; // noise shaping coeffs
  57. const float *ns_coef_a; // noise shaping coeffs
  58. int channels;
  59. DitherState *state; // dither states for each channel
  60. AudioData *flt_data; // input data in fltp
  61. AudioData *s16_data; // dithered output in s16p
  62. AudioConvert *ac_in; // converter for input to fltp
  63. AudioConvert *ac_out; // converter for s16p to s16 (if needed)
  64. void (*quantize)(int16_t *dst, const float *src, float *dither, int len);
  65. int samples_align;
  66. };
  67. /* mute threshold, in seconds */
  68. #define MUTE_THRESHOLD_SEC 0.000333
  69. /* scale factor for 16-bit output.
  70. The signal is attenuated slightly to avoid clipping */
  71. #define S16_SCALE 32753.0f
  72. /* scale to convert lfg from INT_MIN/INT_MAX to -0.5/0.5 */
  73. #define LFG_SCALE (1.0f / (2.0f * INT32_MAX))
  74. /* noise shaping coefficients */
  75. static const float ns_48_coef_b[4] = {
  76. 2.2374f, -0.7339f, -0.1251f, -0.6033f
  77. };
  78. static const float ns_48_coef_a[4] = {
  79. 0.9030f, 0.0116f, -0.5853f, -0.2571f
  80. };
  81. static const float ns_44_coef_b[4] = {
  82. 2.2061f, -0.4707f, -0.2534f, -0.6213f
  83. };
  84. static const float ns_44_coef_a[4] = {
  85. 1.0587f, 0.0676f, -0.6054f, -0.2738f
  86. };
  87. static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
  88. {
  89. int i;
  90. for (i = 0; i < len; i++)
  91. dst[i] = src[i] * LFG_SCALE;
  92. }
  93. static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
  94. {
  95. int i;
  96. int *src1 = src0 + len;
  97. for (i = 0; i < len; i++) {
  98. float r = src0[i] * LFG_SCALE;
  99. r += src1[i] * LFG_SCALE;
  100. dst[i] = r;
  101. }
  102. }
  103. static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
  104. {
  105. int i;
  106. for (i = 0; i < len; i++)
  107. dst[i] = av_clip_int16(lrintf(src[i] * S16_SCALE + dither[i]));
  108. }
  109. #define SQRT_1_6 0.40824829046386301723f
  110. static void dither_highpass_filter(float *src, int len)
  111. {
  112. int i;
  113. /* filter is from libswresample in FFmpeg */
  114. for (i = 0; i < len - 2; i++)
  115. src[i] = (-src[i] + 2 * src[i + 1] - src[i + 2]) * SQRT_1_6;
  116. }
  117. static int generate_dither_noise(DitherContext *c, DitherState *state,
  118. int min_samples)
  119. {
  120. int i;
  121. int nb_samples = FFALIGN(min_samples, 16) + 16;
  122. int buf_samples = nb_samples *
  123. (c->method == AV_RESAMPLE_DITHER_RECTANGULAR ? 1 : 2);
  124. unsigned int *noise_buf_ui;
  125. av_freep(&state->noise_buf);
  126. state->noise_buf_size = state->noise_buf_ptr = 0;
  127. state->noise_buf = av_malloc(buf_samples * sizeof(*state->noise_buf));
  128. if (!state->noise_buf)
  129. return AVERROR(ENOMEM);
  130. state->noise_buf_size = FFALIGN(min_samples, 16);
  131. noise_buf_ui = (unsigned int *)state->noise_buf;
  132. av_lfg_init(&state->lfg, state->seed);
  133. for (i = 0; i < buf_samples; i++)
  134. noise_buf_ui[i] = av_lfg_get(&state->lfg);
  135. c->ddsp.dither_int_to_float(state->noise_buf, noise_buf_ui, nb_samples);
  136. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_HP)
  137. dither_highpass_filter(state->noise_buf, nb_samples);
  138. return 0;
  139. }
  140. static void quantize_triangular_ns(DitherContext *c, DitherState *state,
  141. int16_t *dst, const float *src,
  142. int nb_samples)
  143. {
  144. int i, j;
  145. float *dither = &state->noise_buf[state->noise_buf_ptr];
  146. if (state->mute > c->mute_reset_threshold)
  147. memset(state->dither_a, 0, sizeof(state->dither_a));
  148. for (i = 0; i < nb_samples; i++) {
  149. float err = 0;
  150. float sample = src[i] * S16_SCALE;
  151. for (j = 0; j < 4; j++) {
  152. err += c->ns_coef_b[j] * state->dither_b[j] -
  153. c->ns_coef_a[j] * state->dither_a[j];
  154. }
  155. for (j = 3; j > 0; j--) {
  156. state->dither_a[j] = state->dither_a[j - 1];
  157. state->dither_b[j] = state->dither_b[j - 1];
  158. }
  159. state->dither_a[0] = err;
  160. sample -= err;
  161. if (state->mute > c->mute_dither_threshold) {
  162. dst[i] = av_clip_int16(lrintf(sample));
  163. state->dither_b[0] = 0;
  164. } else {
  165. dst[i] = av_clip_int16(lrintf(sample + dither[i]));
  166. state->dither_b[0] = av_clipf(dst[i] - sample, -1.5f, 1.5f);
  167. }
  168. state->mute++;
  169. if (src[i])
  170. state->mute = 0;
  171. }
  172. }
  173. static int convert_samples(DitherContext *c, int16_t **dst, float * const *src,
  174. int channels, int nb_samples)
  175. {
  176. int ch, ret;
  177. int aligned_samples = FFALIGN(nb_samples, 16);
  178. for (ch = 0; ch < channels; ch++) {
  179. DitherState *state = &c->state[ch];
  180. if (state->noise_buf_size < aligned_samples) {
  181. ret = generate_dither_noise(c, state, nb_samples);
  182. if (ret < 0)
  183. return ret;
  184. } else if (state->noise_buf_size - state->noise_buf_ptr < aligned_samples) {
  185. state->noise_buf_ptr = 0;
  186. }
  187. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  188. quantize_triangular_ns(c, state, dst[ch], src[ch], nb_samples);
  189. } else {
  190. c->quantize(dst[ch], src[ch],
  191. &state->noise_buf[state->noise_buf_ptr],
  192. FFALIGN(nb_samples, c->samples_align));
  193. }
  194. state->noise_buf_ptr += aligned_samples;
  195. }
  196. return 0;
  197. }
  198. int ff_convert_dither(DitherContext *c, AudioData *dst, AudioData *src)
  199. {
  200. int ret;
  201. AudioData *flt_data;
  202. /* output directly to dst if it is planar */
  203. if (dst->sample_fmt == AV_SAMPLE_FMT_S16P)
  204. c->s16_data = dst;
  205. else {
  206. /* make sure s16_data is large enough for the output */
  207. ret = ff_audio_data_realloc(c->s16_data, src->nb_samples);
  208. if (ret < 0)
  209. return ret;
  210. }
  211. if (src->sample_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
  212. /* make sure flt_data is large enough for the input */
  213. ret = ff_audio_data_realloc(c->flt_data, src->nb_samples);
  214. if (ret < 0)
  215. return ret;
  216. flt_data = c->flt_data;
  217. }
  218. if (src->sample_fmt != AV_SAMPLE_FMT_FLTP) {
  219. /* convert input samples to fltp and scale to s16 range */
  220. ret = ff_audio_convert(c->ac_in, flt_data, src);
  221. if (ret < 0)
  222. return ret;
  223. } else if (c->apply_map) {
  224. ret = ff_audio_data_copy(flt_data, src, c->ch_map_info);
  225. if (ret < 0)
  226. return ret;
  227. } else {
  228. flt_data = src;
  229. }
  230. /* check alignment and padding constraints */
  231. if (c->method != AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  232. int ptr_align = FFMIN(flt_data->ptr_align, c->s16_data->ptr_align);
  233. int samples_align = FFMIN(flt_data->samples_align, c->s16_data->samples_align);
  234. int aligned_len = FFALIGN(src->nb_samples, c->ddsp.samples_align);
  235. if (!(ptr_align % c->ddsp.ptr_align) && samples_align >= aligned_len) {
  236. c->quantize = c->ddsp.quantize;
  237. c->samples_align = c->ddsp.samples_align;
  238. } else {
  239. c->quantize = quantize_c;
  240. c->samples_align = 1;
  241. }
  242. }
  243. ret = convert_samples(c, (int16_t **)c->s16_data->data,
  244. (float * const *)flt_data->data, src->channels,
  245. src->nb_samples);
  246. if (ret < 0)
  247. return ret;
  248. c->s16_data->nb_samples = src->nb_samples;
  249. /* interleave output to dst if needed */
  250. if (dst->sample_fmt == AV_SAMPLE_FMT_S16) {
  251. ret = ff_audio_convert(c->ac_out, dst, c->s16_data);
  252. if (ret < 0)
  253. return ret;
  254. } else
  255. c->s16_data = NULL;
  256. return 0;
  257. }
  258. void ff_dither_free(DitherContext **cp)
  259. {
  260. DitherContext *c = *cp;
  261. int ch;
  262. if (!c)
  263. return;
  264. ff_audio_data_free(&c->flt_data);
  265. ff_audio_data_free(&c->s16_data);
  266. ff_audio_convert_free(&c->ac_in);
  267. ff_audio_convert_free(&c->ac_out);
  268. for (ch = 0; ch < c->channels; ch++)
  269. av_free(c->state[ch].noise_buf);
  270. av_free(c->state);
  271. av_freep(cp);
  272. }
  273. static av_cold void dither_init(DitherDSPContext *ddsp,
  274. enum AVResampleDitherMethod method)
  275. {
  276. ddsp->quantize = quantize_c;
  277. ddsp->ptr_align = 1;
  278. ddsp->samples_align = 1;
  279. if (method == AV_RESAMPLE_DITHER_RECTANGULAR)
  280. ddsp->dither_int_to_float = dither_int_to_float_rectangular_c;
  281. else
  282. ddsp->dither_int_to_float = dither_int_to_float_triangular_c;
  283. if (ARCH_X86)
  284. ff_dither_init_x86(ddsp, method);
  285. }
  286. DitherContext *ff_dither_alloc(AVAudioResampleContext *avr,
  287. enum AVSampleFormat out_fmt,
  288. enum AVSampleFormat in_fmt,
  289. int channels, int sample_rate, int apply_map)
  290. {
  291. AVLFG seed_gen;
  292. DitherContext *c;
  293. int ch;
  294. if (av_get_packed_sample_fmt(out_fmt) != AV_SAMPLE_FMT_S16 ||
  295. av_get_bytes_per_sample(in_fmt) <= 2) {
  296. av_log(avr, AV_LOG_ERROR, "dithering %s to %s is not supported\n",
  297. av_get_sample_fmt_name(in_fmt), av_get_sample_fmt_name(out_fmt));
  298. return NULL;
  299. }
  300. c = av_mallocz(sizeof(*c));
  301. if (!c)
  302. return NULL;
  303. c->apply_map = apply_map;
  304. if (apply_map)
  305. c->ch_map_info = &avr->ch_map_info;
  306. if (avr->dither_method == AV_RESAMPLE_DITHER_TRIANGULAR_NS &&
  307. sample_rate != 48000 && sample_rate != 44100) {
  308. av_log(avr, AV_LOG_WARNING, "sample rate must be 48000 or 44100 Hz "
  309. "for triangular_ns dither. using triangular_hp instead.\n");
  310. avr->dither_method = AV_RESAMPLE_DITHER_TRIANGULAR_HP;
  311. }
  312. c->method = avr->dither_method;
  313. dither_init(&c->ddsp, c->method);
  314. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  315. if (sample_rate == 48000) {
  316. c->ns_coef_b = ns_48_coef_b;
  317. c->ns_coef_a = ns_48_coef_a;
  318. } else {
  319. c->ns_coef_b = ns_44_coef_b;
  320. c->ns_coef_a = ns_44_coef_a;
  321. }
  322. }
  323. /* Either s16 or s16p output format is allowed, but s16p is used
  324. internally, so we need to use a temp buffer and interleave if the output
  325. format is s16 */
  326. if (out_fmt != AV_SAMPLE_FMT_S16P) {
  327. c->s16_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_S16P,
  328. "dither s16 buffer");
  329. if (!c->s16_data)
  330. goto fail;
  331. c->ac_out = ff_audio_convert_alloc(avr, out_fmt, AV_SAMPLE_FMT_S16P,
  332. channels, sample_rate, 0);
  333. if (!c->ac_out)
  334. goto fail;
  335. }
  336. if (in_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
  337. c->flt_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_FLTP,
  338. "dither flt buffer");
  339. if (!c->flt_data)
  340. goto fail;
  341. }
  342. if (in_fmt != AV_SAMPLE_FMT_FLTP) {
  343. c->ac_in = ff_audio_convert_alloc(avr, AV_SAMPLE_FMT_FLTP, in_fmt,
  344. channels, sample_rate, c->apply_map);
  345. if (!c->ac_in)
  346. goto fail;
  347. }
  348. c->state = av_mallocz(channels * sizeof(*c->state));
  349. if (!c->state)
  350. goto fail;
  351. c->channels = channels;
  352. /* calculate thresholds for turning off dithering during periods of
  353. silence to avoid replacing digital silence with quiet dither noise */
  354. c->mute_dither_threshold = lrintf(sample_rate * MUTE_THRESHOLD_SEC);
  355. c->mute_reset_threshold = c->mute_dither_threshold * 4;
  356. /* initialize dither states */
  357. av_lfg_init(&seed_gen, 0xC0FFEE);
  358. for (ch = 0; ch < channels; ch++) {
  359. DitherState *state = &c->state[ch];
  360. state->mute = c->mute_reset_threshold + 1;
  361. state->seed = av_lfg_get(&seed_gen);
  362. generate_dither_noise(c, state, FFMAX(32768, sample_rate / 2));
  363. }
  364. return c;
  365. fail:
  366. ff_dither_free(&c);
  367. return NULL;
  368. }