# create_pwm.py # Author: Neha Bhagwat # Last updated: December 31, 2017 # Purpose: Program to create a position weight matrix with Laplace pseudocount # Program uses the Python function open() and the Python methods read(), write(), # close(), split(), append() and log() # from __future__ import division import math # Take Donor_MOG.txt as input input_file = open("Donor_MOG.txt","r") input_data = input_file.read() lines = input_data.split('\r\n') input_file.close() # Initialize 4 lists: a, c, g, and t will 9 zeros in each a = 9*[0] c = 9*[0] g = 9*[0] t = 9*[0] # Count of the occurrences of each character at a given position for line in lines: for i in range(9): if line[i] == 'A': a[i] = a[i]+1 elif line[i] == 'C': c[i] = c[i]+1 elif line[i] == 'G': g[i] = g[i]+1 else: t[i] = t[i]+1 # Compute the 36 log-odd scores (log of observed/expected) where expected is: # 0.28 for a's and t's, and 0.22 for c's and g's. # Pseudocount is 1 [aka Laplace pseudocount] # The log is taken in base 2 for i in range(9): a[i] = round(math.log(((a[i] + 1)/11)/0.28,2),3) c[i] = round(math.log(((c[i] + 1)/11)/0.22,2),3) g[i] = round(math.log(((g[i] + 1)/11)/0.22,2),3) t[i] = round(math.log(((t[i] + 1)/11)/0.28,2),3) # Write the 36 values of the entries of the PWM into a file Donor_MOG_matrix.txt, # Donor_MOG_matrix.txt should have 4 rows with 9 values on each line output_file = open("Donor_MOG_matrix.txt","w") for i in range(9): output_file.write(str(a[i]) + "\t") output_file.write("\n") for i in range(9): output_file.write(str(c[i]) + "\t") output_file.write("\n") for i in range(9): output_file.write(str(g[i]) + "\t") output_file.write("\n") for i in range(9): output_file.write(str(t[i]) + "\t") output_file.write("\n") output_file.close()