Template:Groningen/dna analysis py

import numpy as np import random import copy

class DNA_Analysist():

message = ['G','A','C','C','A','A','G','C','C','T','G','C','A','A','A','A','A','C','A','A','A','G','T','G','C','A','A','A','G','A','T', 'A','T','C','A','G','T','A','A','G','G','T','C','T','T','A','A','A','G','G','C','C','G','A','A','G','C','G','G','T','G','G','C','C','T', 'A','A','G','A','T','A','A','A','A','C','T','G','G','G','C','G','C','C','C','T','G','G','C','G','T','T','A','G','T','T','C','C','G','C', 'A','G','A','A','G','A','T','A','A','A','A','C','T','G','G','G','C','T','A','A','G','G','C','A','T','C','A','A','A','G','A','A','G','G', 'T','A','T','C','T','A','A','A','T','C','C','G','A','A','G','T','A','G','T','T','A','G','C','T','A','A','G','T','G','A','T','T','G','G', 'C','G','G','G','C','G','A','A','G','G','A','A','T','T','T','G','C','T','A','A','G','C','T','A','T','G','A','A','G','G','A','T','T','G', 'G','A','C','T','A','A','G','C','C','A','T','T','C','C','C','G','A','A','G','C','T','A','T','C','A','C','A','T','A','A','G','C','G','A', 'T','G','A','C','A','T','G','C','A','C','C','G','C','A','G','C','A','G','A','C','A','G','G','T','A','C','A','A','A','T','A','A','G','A', 'A','A','A','C','T','T','T','G','A','A','G','T','C','T','T','T','A','A','G','A','A','T','T','A','G','T','A','T','T','A','A','G','A','A', 'G','T','G','A','T','G','A','A','G','G','G','C','T','A','T','G','C','C','A','C','C','C','T','C','C','T','A','G','A','G','A','C','A','G','C', 'T','A','T','A','A','G','C','T','C','T','T','A','A','G','G','A','G','T','G','A','A','G','C','T','G','T','T','G','A','A','T','A','A','G', 'G','C','G','T','A','G','T','A','G','A','A','G','C','A','A','T','T','A','A','G','T','C','C','T','G','A','A','G','T','T','C','T','G','A', 'A','G','C','T','G','T','T','G','A','C','G','A','A','G','A','T','A','A','A','A','C','T','C','T','G','A','A','T','A','A','T','A','A','G','A', 'C','C','T','A','C','T','A','T','A','A','G','T','G','C','T','G','A','A','G','C','G','T','T','G','A', 'A','G','A','G','A','T','A','A','G','A', 'C','A','G','A','G','A','A','G','T','C','T','T','G','A','A','G','T','G','C','T','T','A','A','A','G','A','A','G','C','C','T','T','C','A','A', 'C','T','A','A','G','T','G','A','T','T','A','C','A','C','T','G','C','T','A','A','G','A','G','A','T','G','A','A','G','C','A','T','T','G','C', 'A','C','C','C','A','C','C','G','A','C']

def plots(simulations, mutations):

import seaborn as sbn from matplotlib import pyplot as plt from scipy.optimize import leastsq

plt.bar(simulations, mutations) plt.ylabel('Occurance of Random Mutations') plt.xlabel('Amount of Simulations') plt.title('Random Mutations in Bacillus Subtilis') plt.show()

def random_mutation(g):

mutation_rate = 2 indexes_to_mutate = []

for i in xrange(0,mutation_rate): index = random.randint(0,len(g)-1) indexes_to_mutate.append(index)

#print "Indexes to mutate", indexes_to_mutate

for j in indexes_to_mutate:

if g[j] == 'A': mutation = random.randint(0,2) if mutation == 0: g[j] = 'T' elif mutation == 1: g[j] = 'C' elif mutation == 2: g[j] = 'G'

elif g[j] == 'T': mutation = random.randint(0,2) if mutation == 0: g[j] = 'A' elif mutation == 1: g[j] = 'C' elif mutation == 2: g[j] = 'G'

elif g[j] == 'C': mutation = random.randint(0,2) if mutation == 0: g[j] = 'A' elif mutation == 1: g[j] = 'T' elif mutation == 2: g[j] = 'G'

elif g[j] == 'G': mutation = random.randint(0,2) if mutation == 0: g[j] = 'A' elif mutation == 1: g[j] = 'C' elif mutation == 2: g[j] = 'T'

g = [x.upper() for x in g]

return g, indexes_to_mutate

def check_genomes(original_g, mutated_g):

random_mutation_indexes = [] random_mutation_values = []

for i, (or_g, mut_g) in enumerate(zip(original_g, mutated_g)): if or_g != mut_g: random_mutation_values.append(mut_g) random_mutation_indexes.append(i)

#print "Mutated components", random_mutation_values #print "Position", random_mutation_indexes

return random_mutation_values, random_mutation_indexes #important for checking if the indexes are the same ones of where the message is placed

def read_sequence(): #Original Bacteria DNA sequence

genome = []

with open('BacillusSubtilis.txt') as f: g = map(str.rstrip, f)

g = .join(g)

for i in g: genome.append(i)

print "... Importing Bacillus Subtilis DNA ..."

return genome

def insert_message(genome, message): #Message is there!

starting_index = random.randint(0, len(genome)-1)

while True: ending_index = random.randint(0, len(genome)-1) if ending_index > starting_index and ending_index - starting_index == len(message): break

genome[starting_index:ending_index] = message

print "... Inserting the Message into the DNA ..."

genome = [x.upper() for x in genome]

return genome, starting_index, ending_index

def check_message_indexes(message_indexes, mutation_indexes):

n_random_mutations = 0

for i in xrange(message_indexes[0], message_indexes[1]): for j in mutation_indexes: if i==j: print "A Random Mutation in the Message Occured!" n_random_mutations += 1

print "Number of Random Mutations", n_random_mutations print "------------------------------------------------"

return n_random_mutations

if __name__ == '__main__':

simulations_counter = [] mutations_counter = [] message_length = []

i = 1

#for i in xrange(1, 1000): while True: print "Running Simulation:", i message = message * i simulations_counter.append(i)

message_indexes = []

genome = read_sequence()

inserted_genome, start_message_index, stop_message_index = insert_message(genome, message) intact_inserted_genome = copy.copy(inserted_genome) print "Length of the message inserted in the genome", len(message)

message_length.append(len(message))

mutated_genome, mutation_indexes = random_mutation(inserted_genome) check_genomes(intact_inserted_genome, mutated_genome)

message_indexes.append(start_message_index) message_indexes.append(stop_message_index)

amount_mutations = check_message_indexes(message_indexes,mutation_indexes) mutations_counter.append(amount_mutations)

if amount_mutations != 0: break i+=1

plots(simulations_counter, mutations_counter)