# draft_gc_content_NCBI.py # Author: Sami Khuri # Last updated: February 15, 2016 # Purpose: A function that computes the percentage of G's and C's in a DNA sequence # read from a file # Program uses: input file, for loop, and # python built-in functions: open(), lower() and float() def gc_count(seq): """Compute the percentage of G and C in a DNA sequence""" g_count = 0 a_count = 0 c_count = 0 t_count = 0 for line in seq: # convert bases to lower case line = line.lower() # for each base pair in the string, for bp in line: # next, if the bp is a G, if bp == 'g': g_count = g_count + 1 if bp == 'c': c_count = c_count + 1 if bp == 'a': a_count = a_count + 1 if bp == 't': t_count = t_count + 1 # divide the gc_count by the total_count gc_content = (float(g_count + c_count) / float(a_count + c_count + g_count + t_count)) *100 return(gc_content) cds = open("BRCA1.txt", "r") # You will get an error message with an indication of BRCA1.txt's path # Copy the path from the message and put it before "BRACA1.txt" in open() print 'GC content:', gc_count(cds), "%"