PythonWise

If it won't be simple, it simply won't be. [Hire me, source code] by Miki Tebeka, CEO, 353Solutions

Friday, July 03, 2009

Little Genetics #!/usr/bin/env python
'''Playing with genetic algorithms, see
http://en.wikipedia.org/wiki/Genetic_programming.

The main idea that the "chromosome" represents variables in our algorithm and we
have a fitness function to check how good is it. For each generation we keep the
best and then mutate and crossover some of them.

Since the best chromosomes move from generation to generation, we cache the
fitness function results.

I'm pretty sure I got the basis for this from somewhere on the net, just don't
remeber where :)
'''

from itertools import starmap
from random import random, randint, choice
from sys import stdout

MUTATE_PROBABILITY = 0.1

def mutate_gene(n, range):
if random() > MUTATE_PROBABILITY:
return n

while 1:
# Make sure we mutated something
new = randint(range, range)
if new != n:
return new

def mutate(chromosome, ranges):
def mutate(gene, range):
return mutate_gene(gene, range)

while 1:
new = tuple(starmap(mutate, zip(chromosome, ranges)))
if new != chromosome:
return new

def crossover(chromosome1, chromosome2):
return tuple(map(choice, zip(chromosome1, chromosome2)))

def make_chromosome(ranges):
return tuple(starmap(randint, ranges))

def breed(population, size, ranges):
new = population[:]
while len(new) < size:
new.append(crossover(choice(population), choice(population)))
new.append(mutate(choice(population), ranges))

return new[:size]

def evaluate(fitness, chromosome, data, cache):
if chromosome not in cache:
cache[chromosome] = fitness(chromosome, data)

return cache[chromosome]

def update_score_cache(population, fitness, data, cache):
for chromosome in population:
if chromosome in cache:
continue
cache[chromosome] = fitness(chromosome, data)

def find_solution(fitness, data, ranges, popsize, nruns, verbose=0):
score_cache = {}
population = [make_chromosome(ranges) for i in range(popsize)]
for generation in xrange(nruns):
update_score_cache(population, fitness, data, score_cache)
population.sort(key=score_cache.get, reverse=1)
if verbose:
best = population
err = score_cache[best]
print "%s: a=%s, b=%s, err=%s" % (generation, best, best, err)

base = population[:popsize/4]
population = breed(base, popsize, ranges)

population.sort(key=score_cache.get, reverse=1)
return population, score_cache[population]

def test(show_graph=1):
'''Try to find a linear equation a*x + b that is closest to log(x)'''
from math import log
xs = range(100)
data = map(lambda i: log(i+1) * 100, xs)
def fitness(chromosome, data):
'''Calculate average error'''
a, b = chromosome
def f(x):
return a * x + b
values = map(f, xs)
diffs = map(lambda i: abs(values[i] - data[i]), xs)

# We want minimal error so return 1/error
return 1 / (sum(diffs) / len(diffs))

# Show a nice plot
(a, b), err = find_solution(fitness, data, ((0, 100), (0, 100)), 10, 100, 1)
print "best: a=%s, b=%s (error=%s)" % (a, b, err)

data2 = map(lambda x: a * x + b, range(100))
if not show_graph:
return

import pylab
l1, l2 = pylab.plot(xs, data, xs, data2)
pylab.legend((l1, l2), ("log(x+1)", "%s * x + %s" % (a, b)))
pylab.show()

if __name__ == "__main__":
test()