# draft_gc_content_NCBI.py
# Author: Sami Khuri 
# Last updated: February 15, 2016
# Purpose: A function that computes the percentage of G's and C's in a DNA sequence
#          read from a file 
# Program uses: input file, for loop, and 
#               python built-in functions: open(), lower() and float()

def gc_count(seq):
    """Compute the percentage of G and C in a DNA sequence"""
    g_count = 0
    a_count = 0
    c_count = 0
    t_count = 0
    
    for line in seq:
        # convert bases to lower case
        line = line.lower()
        # for each base pair in the string,
        for bp in line:
        # next, if the bp is a G,
            if bp == 'g':
                g_count = g_count + 1
            if bp == 'c':
                c_count = c_count + 1
            if bp == 'a':
                a_count = a_count + 1
            if bp == 't':
                t_count = t_count + 1  
                        
    # divide the gc_count by the total_count
    gc_content = (float(g_count + c_count) / float(a_count + c_count + g_count + t_count)) *100
    return(gc_content)

cds = open("BRCA1.txt", "r")
# You will get an error message with an indication of BRCA1.txt's path
# Copy the path from the message and put it before "BRACA1.txt" in open()
print 'GC content:', gc_count(cds), "%"