#! /usr/bin/python
# calculate pI of set of proteins
# takes list of pkas file name and  sequence file name as command line args

import sys

# attemptint to open the files that are to be used, gives useful error messages if they can't be opened
try:
	pkas = open(sys.argv[1])
except:
	try:
		print "Can't open", sys.argv[1]
		sys.exit()
	except: 
		print "Please enter a pKa list filename"
		sys.exit()
try:
	sequence = open(sys.argv[2])
except:
	try:
		print "Can't open", sys.argv[2]
		sys.exit()
	except:
		print "Please enter a sequence filename"
		sys.exit()


# this function finds the fraction of any particular residue that is protonated, when given the pH and pKa

def function(ph1,pka):
	return (1/((10**(ph1-pka))+1))

pkadict = {}
goesto = {}

pi = []

# reads pKas into dictionary pkadict from pka file
# as well as whether the residue goes from -ve to neutral when protonated, or from neutral to +ve ("goesto" dictionary)

for line in pkas.readlines():
	fields = line.split()
	pkadict[fields[0]] = float(fields[1])
	goesto[fields[0]] = fields[2]


# reads sequence file

for line in sequence.readlines():
	fields = line.split()

	# takes protein sequence (second field in line)
	protein = fields[1]

	# creates dictionary of residues
	# add C and N terminals

	chargable = {}
	chargable["c"] = 1
	chargable["n"] = 1

	# scans through protein sequence and tallies the number of residues that contribute to the pI, based on whether they are in the pka dictionary
	for character in protein:
		if character in pkadict.keys():
			if character not in chargable.keys():
				chargable[character] = 0
			chargable[character]+= 1

	# initialising variables for charge calculation
	charge = 0
	previous = 0
	ph = 0
	phmax = 12.0
	phstep = 1.0
	finished = False
	while ph <= phmax:

	# for each residue that has been tallied up
	
		for item in chargable.keys():

			# for residues which add +ve charge, charge = % protonated * +1
			# for residues which add -ve charge, charge = % not protonated * -1
			if goesto[item] == "positive":
				charge += ((function(ph,pkadict[item]))*chargable[item])
			elif goesto[item] == "neutral":
				charge += ((1-function(ph,pkadict[item]))*(-1)*chargable[item])
	
		# checks if this is a crossing point, refines pH step down to accuracy of 0.01 
		# once enough accuracy is reached, adds pI (pH at which charge was closest to 0) to list of pIs and breaks
		if previous * charge < 0:
			if phstep > 0.01:
				ph-= phstep
				phstep = phstep/10
			else: 
			 	if abs(charge) > abs(previous):
					pi.append(ph-phstep)
				else:
					pi.append(ph)
				finished = True
		else:
			previous = charge
		charge = 0
		ph += phstep
		if finished == True:
			ph = 10000
for number in pi:
	print number

