#! /usr/bin/python
# module of useful functions, classes and variables for fetching and displaying protein data

import sys, re, urllib

# returns xml page of gids for supplied search term
def esearch(searchterm):
	x = urllib.urlopen('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term='+searchterm)
	return x

# returns structure id for given pdb reference for image display
def structuresearch(term):
	z = urllib.urlopen('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=structure&term='+term).readlines()
	for r in z:
		q = re.search(r'<Id>(.*)</Id>',r)
		if q:
			return q.group(1)

# fetches protein information xml file when given gid
def efetch(gid):
	y = urllib.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id="+gid+"&rettype=xml&retmode=text").read()
	return y

# searches for information between given xml tags
def xmlsearch(term,file):
	searchstring = r'<'+term+'>(.*)</'+term+'>'
	found = re.search(searchstring,file)
	if found:
		return found.group(1)
	else:
		return "no information available"

# takes command line arguments and gives error message if none are supplied
def takeargument(number=1,alttext="",default=None):
	try:
		x =sys.argv[number]
	except:
		print alttext
		x = default
	return x

# a datatype for protein information
class protein:
	def __init__(self,a,b,c,q,d=None,e=None):
		self.gid = a
		self.name = b
		self.sequence = c
		self.organism = q
		self.pdbid = d
		self.image = e

# some useful strings for w3c valid html pages
doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n "
charset = "<html>\n <head>\n <meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n"
css = "<link rel=\"stylesheet\" href=\"index.css\" type=\"text/css\">"
header = doctype + charset + css
middle = "</head>\n <body>\n <table>"
bottom = "</table></body>\n </html>"		

# creates webpage for individual protein when given information
def createpage(prefix,ptn):
	filename = prefix+ ptn.gid  +".html"
	filehandler = open(filename,"w")
	title = "<title>Information for protein "+ ptn.gid +"</title>\n"
	name = "<tr><td>Name:</td><td>"+ ptn.name +"</td></tr>\n"
	gid = "<tr><td>GID:</td><td>"+ ptn.gid +"</td></tr>\n"
	organism = "<tr><td>Organism:</td><td>" + ptn.organism + "</td></tr>\n"
	pdbid = "<tr><td>PDB id:</td><td>"+ ptn.pdbid +"</td></tr>\n"
	sequence = "<tr><td>Sequence:</td><td class=\"seq\">"+ ptn.sequence +"</td></tr>\n"
	if ptn.image:
		image = "<tr><td>Image:</td><td><img src=\""+ptn.image+"\"></td></tr>\n"
	else:
		image = ""
	filehandler.write(header+title+middle+name+gid+organism+sequence+pdbid+image+bottom)
	webpagedata = [filename, ptn.gid, ptn.name]
	return webpagedata

# creates index page of all the files
# and css file for style information
def createblankpage(name):
	filehandler= open(name+".html","w")
	stylesheet=open("index.css","w")
	stylesheet.write(".seq{display: block; width: 30em; overflow-x:scroll;} html {background: black; color: #e7ebc3;} ")
	stylesheet.write("table {border-collapse: collapse}td {border: 1px solid #ebc4d3} a, a:visited {color: #c3ebd3}")
	title = "<title>Collected protein information for keyword "+ name +"</title>\n"
	labels = "<tr><td><b>Name</b></td><td><b>GID</b></td></tr>"
	filehandler.write(header+title+middle+labels)
	return filehandler

def addtoindex(thing,handle):
	handle.write("<tr><td>%s</td><td><a href=\"%s\">%s</a></td></tr>"%(thing[2],thing[0],thing[1]))

