You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
7.2KB

  1. #!/usr/bin/python2.6
  2. #
  3. # Author: Olivier Gillet (ol.gillet@gmail.com)
  4. """Class and functions to read and write numpy array from and to audio files."""
  5. import copy
  6. import logging
  7. import numpy
  8. import struct
  9. import sys
  10. sys.path.append('.')
  11. # Constant used when converting between unsigned char and float. A different
  12. # value is used in both directions to avoid clipping.
  13. _UNSIGNED_CHAR_TO_FLOAT_SCALE = 128.0
  14. _FLOAT_TO_UNSIGNED_CHAR_SCALE = 127.0
  15. _DATA_CHUNK_HEADER_SIZE = 8
  16. _FMT_CHUNK_DATA_SIZE = 16
  17. _FMT_CHUNK_HEADER_SIZE = 8
  18. _RIFF_FORMAT_DESCRIPTOR_SIZE = 4
  19. class AudioIoException(Exception):
  20. """An error indicating a failure in audio file reading/writing."""
  21. def __init__(self, message):
  22. """Initializes an AudioIoException object."""
  23. Exception.__init__(self, 'Audio IO error: %s' % message)
  24. def _ReadBytesOrFail(file_object, num_bytes, error_message):
  25. """Read a given number of bytes from the file or raise an error.
  26. Args:
  27. file_object: file object.
  28. num_bytes: int. number of bytes to read.
  29. error_message: string. text message of the exception thrown when the number
  30. of bytes could not be read (for example, identifying which section the
  31. caller attempted to read.)
  32. Returns:
  33. String with the bytes read from the file.
  34. Raises:
  35. AudioIoException:
  36. - The required number of bytes could not be read from the file.
  37. """
  38. read = file_object.read(num_bytes)
  39. if len(read) < num_bytes:
  40. raise AudioIoException(error_message)
  41. return read
  42. def _GoToIffChunk(file_object, iff_chunk_id):
  43. """Jump to a named chunk in a (R)IFF file.
  44. Args:
  45. file_object: file object.
  46. iff_chunk_id: 4 chars ID of the chunk.
  47. Returns:
  48. length of the chunk in bytes. -1 if the chunk has not been found.
  49. If the chunk is found, file_object is positioned at the beginning of the
  50. chunk. Otherwise, it is positioned at the end of the file.
  51. """
  52. while True:
  53. chunk_id = file_object.read(4)
  54. if len(chunk_id) < 4:
  55. return -1
  56. chunk_size = file_object.read(4)
  57. if len(chunk_size) < 4:
  58. return -1
  59. chunk_size = struct.unpack('<L', chunk_size)
  60. if iff_chunk_id == chunk_id:
  61. return chunk_size[0]
  62. else:
  63. file_object.seek(chunk_size, 1)
  64. def ReadWavFile(file_name, scale=True):
  65. """Read a .wav file into a numpy array.
  66. Note: the FFmpeg based AudioDecoder is more generic, use this only as a
  67. low-level alternative to AudioDecoder.
  68. Args:
  69. file_name: string. name of the local file to load.
  70. scale: boolean. if True, returns float data in the [-1, 1] range instead
  71. of integers.
  72. Returns:
  73. 2-dimensional numpy array of size (num_samples, num_channels)
  74. Raises:
  75. AudioIoException:
  76. - The file header is corrupted.
  77. - The file uses an unsupported sampling rate, bitdepth or codec.
  78. """
  79. f = file(file_name, 'r')
  80. header = f.read(12)
  81. if len(header) < 12 or header[:4] != 'RIFF' or header[8:] != 'WAVE':
  82. raise AudioIoException('Corrupted header')
  83. format_header_size = _GoToIffChunk(f, 'fmt ')
  84. if format_header_size < 0 or format_header_size != 16:
  85. raise AudioIoException('Invalid header size')
  86. format_header = _ReadBytesOrFail(f, 16, 'Corrupted header')
  87. compression, num_channels, sample_rate, _, _, bitdepth = struct.unpack(
  88. '<HHLLHH', format_header)
  89. if compression != 1:
  90. raise AudioIoException('Unknown .wav codec: %d' % compression)
  91. if not num_channels:
  92. raise AudioIoException('Wrong number of channels')
  93. if sample_rate < 1000 or sample_rate > 96000:
  94. raise AudioIoException('Invalid sample rate')
  95. if bitdepth != 8 and bitdepth != 16:
  96. raise AudioIoException('Unsupported bit depth')
  97. sample_data_size = _GoToIffChunk(f, 'data')
  98. num_samples = sample_data_size / (bitdepth / 8)
  99. # Make sure we are reading a number of samples which is a multiple of the
  100. # number of channels. Some corrupted stereo .wav files may contain 5 samples!
  101. num_samples -= num_samples % num_channels
  102. if bitdepth == 8:
  103. samples = numpy.fromfile(f, dtype=numpy.ubyte, count=num_samples)
  104. if scale:
  105. samples = (samples / _UNSIGNED_CHAR_TO_FLOAT_SCALE) - 1.0
  106. else:
  107. bytes = bitdepth / 8
  108. samples = numpy.fromfile(f, dtype='<i%d' % bytes, count=num_samples)
  109. if scale:
  110. # Semantics of x = x / y and x /= y are different when x and y are
  111. # numpy arrays of a different type. x /= y casts to y's type, while
  112. # x = x / y casts to x's type.
  113. # pylint: disable-msg=C6407
  114. samples = samples / float(1 << (bitdepth - 1))
  115. return (samples.reshape(-1, num_channels), sample_rate)
  116. def Quantize(signal, bitdepth, normalize=True):
  117. """Convert an array of float to an array of integers.
  118. Args:
  119. signal: numpy array. source signal.
  120. bitdepth: int. size of the integer in bits.
  121. normalize: boolean. whether samples should be scaled to use all the
  122. available dynamic range.
  123. Returns:
  124. array of integers.
  125. """
  126. norm = numpy.abs(signal).max()
  127. # Normalization or clipping.
  128. if normalize and norm > 0:
  129. scaled_signal = signal / norm
  130. else:
  131. scaled_signal = copy.copy(signal)
  132. if norm > 1.0:
  133. logging.warning('Some samples will be clipped.')
  134. # Clip samples above 1 and below -1.
  135. scaled_signal[scaled_signal < -1] = -1
  136. scaled_signal[scaled_signal > 1] = 1
  137. if bitdepth == 8:
  138. scaled_signal = (scaled_signal + 1.0) * _FLOAT_TO_UNSIGNED_CHAR_SCALE
  139. scaled_signal = numpy.array(scaled_signal, dtype=numpy.uint8)
  140. else:
  141. scale = (1 << (bitdepth - 1)) - 1
  142. # pylint: disable-msg=C6407
  143. scaled_signal = scaled_signal * scale
  144. scaled_signal = numpy.array(scaled_signal, dtype='i%d' % (bitdepth / 8))
  145. return scaled_signal
  146. def WriteWavFile(signal, sample_rate, file_name, bitdepth=16, normalize=True):
  147. """Write a .wav file from a numpy array.
  148. Args:
  149. signal: 2-dimensional numpy array, of size (num_samples, num_channels).
  150. sample_rate: int. sample rate of the signal in Hz.
  151. file_name: string. name of the destination file.
  152. bitdepth: int. bitdepth in bits (default 16).
  153. normalize: boolean. if set to True, scale the data to the [-1, 1] range
  154. before writing.
  155. """
  156. if signal.dtype == numpy.uint8 or signal.dtype == numpy.int16:
  157. bitdepth = signal.dtype.itemsize * 8
  158. scaled_signal = signal
  159. else:
  160. scaled_signal = Quantize(signal, bitdepth, normalize=normalize)
  161. if scaled_signal.ndim == 1:
  162. num_channels = 1
  163. else:
  164. num_channels = scaled_signal.shape[1]
  165. # Compute the total size of the output .wav file, minus the size of the
  166. # first two fields of the RIFF header.
  167. # RIFF Format.
  168. total_size = _RIFF_FORMAT_DESCRIPTOR_SIZE
  169. # 'fmt ' chunk.
  170. total_size += _FMT_CHUNK_HEADER_SIZE + _FMT_CHUNK_DATA_SIZE
  171. # 'data' chunk.
  172. total_size += _DATA_CHUNK_HEADER_SIZE + scaled_signal.nbytes
  173. f = file(file_name, 'w')
  174. try:
  175. f.write('RIFF')
  176. f.write(struct.pack('<L', total_size))
  177. f.write('WAVEfmt ')
  178. bitrate = sample_rate * num_channels * (bitdepth / 8)
  179. bits_per_sample = num_channels * (bitdepth / 8)
  180. f.write(struct.pack('<LHHLLHH', 16, 1, num_channels, sample_rate, bitrate,
  181. bits_per_sample, bitdepth))
  182. f.write('data')
  183. f.write(struct.pack('<L', scaled_signal.nbytes))
  184. scaled_signal.tofile(f)
  185. finally:
  186. f.close()