| 
							- #!/usr/bin/python2.5
 - #
 - # Copyright 2009 Olivier Gillet.
 - #
 - # Author: Olivier Gillet (ol.gillet@gmail.com)
 - #
 - # This program is free software: you can redistribute it and/or modify
 - # it under the terms of the GNU General Public License as published by
 - # the Free Software Foundation, either version 3 of the License, or
 - # (at your option) any later version.
 - # This program is distributed in the hope that it will be useful,
 - # but WITHOUT ANY WARRANTY; without even the implied warranty of
 - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 - # GNU General Public License for more details.
 - # You should have received a copy of the GNU General Public License
 - # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 - #
 - # -----------------------------------------------------------------------------
 - #
 - # Self-organizing map.
 - 
 - import numpy
 - import random
 - 
 - 
 - class SOM(object):
 -   
 -   def __init__(self, grid_size, radius, learning_rate):
 -     self._grid_size = grid_size
 -     self._grid_x = numpy.arange(0, grid_size * grid_size) % grid_size
 -     self._grid_y = numpy.arange(0, grid_size * grid_size) / grid_size
 -     self._radius = radius
 -     self._learning_rate = learning_rate
 -     self._codewords = None
 - 
 -   @staticmethod
 -   def standardize(x, std_power=1, axis=0, regularization=0.0):
 -     t, n = x.shape
 -     mean = x.mean(axis=axis)
 -     std = x.std(axis=axis) ** std_power + regularization
 -     return (x - mean) / std
 -     
 -   def classify(self, data):
 -     distances = ((self._codewords - data) ** 2).sum(axis=1)
 -     return distances.argmin(), distances
 -     
 -   def train(self, data, iterations=10000, seed=42):
 -     n, d = data.shape
 -     nodes = self._grid_size
 -     numpy.random.seed(seed)
 -     random.seed(seed)
 -     self._codewords = numpy.random.randn(nodes * nodes, d)
 -     self._error_history = []
 -     milestone = 2
 -     for i in xrange(iterations):
 -       if i == milestone:
 -         print 'iteration', i, 'of', iterations, '\t', \
 -             round(1000.0 * i / iterations) * 0.1, '%'
 -         milestone <<= 1
 -       radius = self._radius * 2 ** (-2 * float(i) / iterations)
 -       learning_rate = self._learning_rate * 2 ** (-7 * float(i) / iterations)
 -       
 -       # Pick a random vector.
 -       x = random.choice(data)
 -       
 -       # Find best matching unit.
 -       bmu, distances = self.classify(x)
 -       self._error_history.append(distances[bmu])
 -       
 -       # Compute neighborhood update function.
 -       delta_x = self._grid_x[bmu] - self._grid_x
 -       delta_y = self._grid_y[bmu] - self._grid_y
 -       rbf = numpy.exp(-(delta_x ** 2 + delta_y ** 2) / (radius * radius))
 -       rbf = numpy.tile(rbf.reshape((nodes * nodes, 1)), (1, d))
 -       update = rbf * (numpy.tile(x, (nodes * nodes, 1)) - self._codewords)
 -       self._codewords += learning_rate * update
 -     return self._error_history
 -     
 -   def checkpoint(self):
 -     numpy.save('weights', self._codewords)
 - 
 -   def resume(self):
 -     self._codewords = numpy.load('weights.npy')
 - 
 -   def plot(self, x):
 -     import pylab
 - 
 -     for i in xrange(x.shape[0]):
 -       _, d = self.classify(x[i, :])
 -       d = numpy.exp(-d)
 -       pylab.figure()
 -       pylab.imshow(
 -           d.reshape((self._grid_size, self._grid_size)),
 -           interpolation='nearest')
 -       pylab.savefig('response_%d.pdf' % i)
 
 
  |