Author: Morvan
Visualize Genetic Algorithm to match the target phrase.
Visit tutorial website for more: https://morvanzhou.github.io/tutorials/git
Absolutely, it's satisfying.
With only 24 generations(Maybe a little more or less, randomly.), the sentence matches perfectly.github
The whole code is consists of three parts.
Some const variables , the class GA and the main function.web
TARGET_PHRASE = 'I love you!' # target DNA POP_SIZE = 300 # population size CROSS_RATE = 0.4 # mating probability (DNA crossover) MUTATION_RATE = 0.01 # mutation probability N_GENERATIONS = 1000 # max generation DNA_SIZE = len(TARGET_PHRASE) # the size of DNA # target ASCII indices of the target string, convert string to number TARGET_ASCII = np.fromstring(TARGET_PHRASE, dtype=np.uint8) ASCII_BOUND = [32, 126] # the bounds of the ASCII charactors
Firstly, we constuct a GA object ,named ga with the const variables above.app
ga = GA(DNA_size=DNA_SIZE, DNA_bound=ASCII_BOUND, cross_rate=CROSS_RATE, mutation_rate=MUTATION_RATE, pop_size=POP_SIZE)
Then, execute a for loop in the max generationless
for generation in range(N_GENERATIONS): fitness = ga.get_fitness() # get the fitness of the current population best_DNA = ga.pop[np.argmax(fitness)] # best matching DNA best_phrase = ga.translateDNA(best_DNA) # translate the DNA to string we can read print('Gen', generation, ': ', best_phrase) # print the string if best_phrase == TARGET_PHRASE: # as if the string matches perfectly, break the loop break ga.evolve() # if not, evolve to the next generation
def __init__(self, DNA_size, DNA_bound, cross_rate, mutation_rate, pop_size): self.DNA_size = DNA_size # the size of DNA DNA_bound[1] += 1 self.DNA_bound = DNA_bound # ASCII Bound self.cross_rate = cross_rate self.mutate_rate = mutation_rate self.pop_size = pop_size # population # define the population of first generation self.pop = np.random.randint(*DNA_bound, size=(pop_size, DNA_size)).astype(np.int8) # int8 for convert to ASCII
To initialize the class with const variables abovedom
def translateDNA(self, DNA): # convert to readable string return DNA.tostring().decode('ascii')
Such as translate
[126, 34, 45, 125, 46, 101, 32, 119, 38, 126, 33] to '~"-}.e w&~!'oop
def get_fitness(self): # count how many character matches match_count = (self.pop == TARGET_ASCII).sum(axis=1) return match_count
To calculate the fitness of every DNA in current generation.
List of array convert to list of int , int means match numbersui
def select(self): fitness = self.get_fitness() + 1e-4 # add a small amount to avoid all zero fitness idx = np.random.choice(np.arange(self.pop_size), size=self.pop_size, replace=True, p=fitness/fitness.sum()) return self.pop[idx]
To select better fit DNA
Every new array element depends on the probabilities of old array elements
(probability = element.fitness/fitness.sum())
The probability is higher, the more it will appear in the new arraythis
def crossover(self, parent, pop): if np.random.rand() < self.cross_rate: i_ = np.random.randint(0, self.pop_size, size=1) # select another individual from pop cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool) # choose crossover points parent[cross_points] = pop[i_, cross_points] # mating and produce one child return parent
To cross this parent with another random parent in current generationcode
def mutate(self, child): for point in range(self.DNA_size): if np.random.rand() < self.mutate_rate: child[point] = np.random.randint(*self.DNA_bound) # choose a random ASCII index return child
Traverse all the elements in a DNA
Randomly convert a element to a random ASCII charactor index
def evolve(self): pop = self.select() pop_copy = pop.copy() for parent in pop: # for every parent child = self.crossover(parent, pop_copy) child = self.mutate(child) parent[:] = child self.pop = pop
To evolve into the next generation.
Firstly, select the better fitting ones
Then, creat a copy of current generation
Then, traverse everyone in current generation,
cross it with another random one in current generation
mutate randomly.
Finally, upgrade to the next generation
import numpy as np TARGET_PHRASE = 'I love you!' # target DNA POP_SIZE = 300 # population size CROSS_RATE = 0.4 # mating probability (DNA crossover) MUTATION_RATE = 0.01 # mutation probability N_GENERATIONS = 1000 DNA_SIZE = len(TARGET_PHRASE) TARGET_ASCII = np.fromstring(TARGET_PHRASE, dtype=np.uint8) # convert string to number ASCII_BOUND = [32, 126] class GA(object): def __init__(self, DNA_size, DNA_bound, cross_rate, mutation_rate, pop_size): self.DNA_size = DNA_size DNA_bound[1] += 1 self.DNA_bound = DNA_bound self.cross_rate = cross_rate self.mutate_rate = mutation_rate self.pop_size = pop_size self.pop = np.random.randint(*DNA_bound, size=(pop_size, DNA_size)).astype(np.int8) # int8 for convert to ASCII def translateDNA(self, DNA): # convert to readable string return DNA.tostring().decode('ascii') def get_fitness(self): # count how many character matches match_count = (self.pop == TARGET_ASCII).sum(axis=1) return match_count def select(self): fitness = self.get_fitness() + 1e-4 # add a small amount to avoid all zero fitness idx = np.random.choice(np.arange(self.pop_size), size=self.pop_size, replace=True, p=fitness/fitness.sum()) return self.pop[idx] def crossover(self, parent, pop): if np.random.rand() < self.cross_rate: i_ = np.random.randint(0, self.pop_size, size=1) # select another individual from pop cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool) # choose crossover points parent[cross_points] = pop[i_, cross_points] # mating and produce one child return parent def mutate(self, child): for point in range(self.DNA_size): if np.random.rand() < self.mutate_rate: child[point] = np.random.randint(*self.DNA_bound) # choose a random ASCII index return child def evolve(self): pop = self.select() pop_copy = pop.copy() for parent in pop: # for every parent child = self.crossover(parent, pop_copy) child = self.mutate(child) parent[:] = child self.pop = pop if __name__ == '__main__': ga = GA(DNA_size=DNA_SIZE, DNA_bound=ASCII_BOUND, cross_rate=CROSS_RATE, mutation_rate=MUTATION_RATE, pop_size=POP_SIZE) for generation in range(N_GENERATIONS): fitness = ga.get_fitness() best_DNA = ga.pop[np.argmax(fitness)] best_phrase = ga.translateDNA(best_DNA) print('Gen', generation, ': ', best_phrase) if best_phrase == TARGET_PHRASE: break ga.evolve()