You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.2KB

  1. #!/usr/bin/python2.5
  2. #
  3. # Copyright 2009 Olivier Gillet.
  4. #
  5. # Author: Olivier Gillet (ol.gillet@gmail.com)
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. #
  18. # -----------------------------------------------------------------------------
  19. #
  20. # Self-organizing map.
  21. import numpy
  22. import random
  23. class SOM(object):
  24. def __init__(self, grid_size, radius, learning_rate):
  25. self._grid_size = grid_size
  26. self._grid_x = numpy.arange(0, grid_size * grid_size) % grid_size
  27. self._grid_y = numpy.arange(0, grid_size * grid_size) / grid_size
  28. self._radius = radius
  29. self._learning_rate = learning_rate
  30. self._codewords = None
  31. @staticmethod
  32. def standardize(x, std_power=1, axis=0, regularization=0.0):
  33. t, n = x.shape
  34. mean = x.mean(axis=axis)
  35. std = x.std(axis=axis) ** std_power + regularization
  36. return (x - mean) / std
  37. def classify(self, data):
  38. distances = ((self._codewords - data) ** 2).sum(axis=1)
  39. return distances.argmin(), distances
  40. def train(self, data, iterations=10000, seed=42):
  41. n, d = data.shape
  42. nodes = self._grid_size
  43. numpy.random.seed(seed)
  44. random.seed(seed)
  45. self._codewords = numpy.random.randn(nodes * nodes, d)
  46. self._error_history = []
  47. milestone = 2
  48. for i in xrange(iterations):
  49. if i == milestone:
  50. print 'iteration', i, 'of', iterations, '\t', \
  51. round(1000.0 * i / iterations) * 0.1, '%'
  52. milestone <<= 1
  53. radius = self._radius * 2 ** (-2 * float(i) / iterations)
  54. learning_rate = self._learning_rate * 2 ** (-7 * float(i) / iterations)
  55. # Pick a random vector.
  56. x = random.choice(data)
  57. # Find best matching unit.
  58. bmu, distances = self.classify(x)
  59. self._error_history.append(distances[bmu])
  60. # Compute neighborhood update function.
  61. delta_x = self._grid_x[bmu] - self._grid_x
  62. delta_y = self._grid_y[bmu] - self._grid_y
  63. rbf = numpy.exp(-(delta_x ** 2 + delta_y ** 2) / (radius * radius))
  64. rbf = numpy.tile(rbf.reshape((nodes * nodes, 1)), (1, d))
  65. update = rbf * (numpy.tile(x, (nodes * nodes, 1)) - self._codewords)
  66. self._codewords += learning_rate * update
  67. return self._error_history
  68. def checkpoint(self):
  69. numpy.save('weights', self._codewords)
  70. def resume(self):
  71. self._codewords = numpy.load('weights.npy')
  72. def plot(self, x):
  73. import pylab
  74. for i in xrange(x.shape[0]):
  75. _, d = self.classify(x[i, :])
  76. d = numpy.exp(-d)
  77. pylab.figure()
  78. pylab.imshow(
  79. d.reshape((self._grid_size, self._grid_size)),
  80. interpolation='nearest')
  81. pylab.savefig('response_%d.pdf' % i)