PAX2Graphml Properties management example

  • how to add custom node and edge properties from an external file,
  • create property tabular files for nodes and edges
  • how to use node and edge properties as mapping keys
  • use Ensembl biomart API to generate new annotations and related node properties
In [1]:
### create a simple graph with uniprot mapped to node,
## like in extracted graphs from Biopax (SPAIM,INFLUENCE)

import sys
import pax2graphml  as p2g
import graph_tool as gt
def graph_example():
  gg = gt.Graph() 
  gg.vp.uniprot=gg.new_vertex_property("string")
  gg.ep.group=gg.new_edge_property("string")


  v0=gg.add_vertex() 
  v1=gg.add_vertex() 
  v2=gg.add_vertex() 
  v3=gg.add_vertex() 
  v4=gg.add_vertex() 
  v5=gg.add_vertex()
  v6=gg.add_vertex()


  e1=gg.add_edge(v0,v5) 
  e2=gg.add_edge(v1,v5) 
  e3=gg.add_edge(v2,v5) 
  e4=gg.add_edge(v3,v5) 
  e5=gg.add_edge(v4,v5) 

  gg.vp["uniprot"][v0]="ACE2"
  gg.vp["uniprot"][v1]="ZP4"
  gg.vp["uniprot"][v3]="PPARA"
  gg.vp["uniprot"][v4]="P19367"
  gg.vp["uniprot"][v5]="JUND"
  gg.vp["uniprot"][v6]="Q96G03"






  gg.ep["group"][e1]="A"
  gg.ep["group"][e2]="A"
  gg.ep["group"][e3]="B"
  gg.ep["group"][e4]="B"
  gg.ep["group"][e5]="C"

  return gg

g=graph_example()

p2g.graph_explore.describe_graph(g)
  vertex 0 (uniprot=ACE2,)
      0 out_edge:(0, 5) (group=A,)


  vertex 1 (uniprot=ZP4,)
      1 out_edge:(1, 5) (group=A,)


  vertex 2 (uniprot=,)
      2 out_edge:(2, 5) (group=B,)


  vertex 3 (uniprot=PPARA,)
      3 out_edge:(3, 5) (group=B,)


  vertex 4 (uniprot=P19367,)
      4 out_edge:(4, 5) (group=C,)


  vertex 5 (uniprot=JUND,)
     5 in_edge:(0, 5) (group=A,)
     5 in_edge:(1, 5) (group=A,)
     5 in_edge:(2, 5) (group=B,)
     5 in_edge:(3, 5) (group=B,)
     5 in_edge:(4, 5) (group=C,)


  vertex 6 (uniprot=Q96G03,)



Out[1]:
'  vertex 0 (uniprot=ACE2,)\n      0 out_edge:(0, 5) (group=A,)\n\n\n  vertex 1 (uniprot=ZP4,)\n      1 out_edge:(1, 5) (group=A,)\n\n\n  vertex 2 (uniprot=,)\n      2 out_edge:(2, 5) (group=B,)\n\n\n  vertex 3 (uniprot=PPARA,)\n      3 out_edge:(3, 5) (group=B,)\n\n\n  vertex 4 (uniprot=P19367,)\n      4 out_edge:(4, 5) (group=C,)\n\n\n  vertex 5 (uniprot=JUND,)\n     5 in_edge:(0, 5) (group=A,)\n     5 in_edge:(1, 5) (group=A,)\n     5 in_edge:(2, 5) (group=B,)\n     5 in_edge:(3, 5) (group=B,)\n     5 in_edge:(4, 5) (group=C,)\n\n\n  vertex 6 (uniprot=Q96G03,)\n\n\n'
In [2]:
### add properties  for each node and edge of the graph using a property file 
## and the uniprot and group properties mapping
import pax2graphml  as p2g


            
propertiesNodefile="annot/annot_node.csv"
mapNodeKey="uniprot"
newNodeProperty="description"
p2g.properties.annot_node_from_file(g,propertiesNodefile,mapNodeKey,newNodeProperty)

propertiesEdgefile="annot/annot_edge.csv"
mapEdgeKey="group"
newEdgeProperty="confidence"
p2g.properties.annot_edge_from_file(g,propertiesEdgefile,mapEdgeKey,newEdgeProperty)


propertiesNodefile="annot/annot_node_index.csv"
mapNodeKey="index"
newNodeProperty="comment"
p2g.properties.annot_node_from_file(g,propertiesNodefile,mapNodeKey,newNodeProperty)

propertiesEdgefile="annot/annot_edge_index.csv"
mapEdgeKey="index"
newEdgeProperty="score"
p2g.properties.annot_edge_from_file(g,propertiesEdgefile,mapEdgeKey,newEdgeProperty)

print(p2g.graph_explore.describe_graph(g))
     
output_prop_node_file="annot/annot_node_out.csv"
output_prop_edge_file="annot/annot_edge_out.csv"

is_unique=p2g.properties.is_unique(g,"uniprot")
print("are each uniprot property values unique:%s" %(is_unique))

print("generating:%s" %(output_prop_node_file))
p2g.properties.annot_node_to_file(g,output_prop_node_file,"index","uniprot",'NA')

print("generating:%s" %(output_prop_edge_file))
p2g.properties.annot_edge_to_file(g,output_prop_edge_file,"index","confidence",'NA')
  vertex 0 (uniprot=ACE2,description=angiotensin I converting enzyme 2,comment=evaluated,)
      0 out_edge:(0, 5) (group=A,confidence=high,score=10,)


  vertex 1 (uniprot=ZP4,description=zona pellucida glycoprotein 4,comment=evaluated,)
      1 out_edge:(1, 5) (group=A,confidence=high,score=15,)


  vertex 2 (uniprot=,description=,comment=investigation,)
      2 out_edge:(2, 5) (group=B,confidence=medium,score=12,)


  vertex 3 (uniprot=PPARA,description=peroxisome proliferator activated receptor alpha,comment=,)
      3 out_edge:(3, 5) (group=B,confidence=medium,score=,)


  vertex 4 (uniprot=P19367,description=,comment=,)
      4 out_edge:(4, 5) (group=C,confidence=low,score=,)


  vertex 5 (uniprot=JUND,description=,comment=,)
     5 in_edge:(0, 5) (group=A,confidence=high,score=10,)
     5 in_edge:(1, 5) (group=A,confidence=high,score=15,)
     5 in_edge:(2, 5) (group=B,confidence=medium,score=12,)
     5 in_edge:(3, 5) (group=B,confidence=medium,score=,)
     5 in_edge:(4, 5) (group=C,confidence=low,score=,)


  vertex 6 (uniprot=Q96G03,description=,comment=,)



  vertex 0 (uniprot=ACE2,description=angiotensin I converting enzyme 2,comment=evaluated,)
      0 out_edge:(0, 5) (group=A,confidence=high,score=10,)


  vertex 1 (uniprot=ZP4,description=zona pellucida glycoprotein 4,comment=evaluated,)
      1 out_edge:(1, 5) (group=A,confidence=high,score=15,)


  vertex 2 (uniprot=,description=,comment=investigation,)
      2 out_edge:(2, 5) (group=B,confidence=medium,score=12,)


  vertex 3 (uniprot=PPARA,description=peroxisome proliferator activated receptor alpha,comment=,)
      3 out_edge:(3, 5) (group=B,confidence=medium,score=,)


  vertex 4 (uniprot=P19367,description=,comment=,)
      4 out_edge:(4, 5) (group=C,confidence=low,score=,)


  vertex 5 (uniprot=JUND,description=,comment=,)
     5 in_edge:(0, 5) (group=A,confidence=high,score=10,)
     5 in_edge:(1, 5) (group=A,confidence=high,score=15,)
     5 in_edge:(2, 5) (group=B,confidence=medium,score=12,)
     5 in_edge:(3, 5) (group=B,confidence=medium,score=,)
     5 in_edge:(4, 5) (group=C,confidence=low,score=,)


  vertex 6 (uniprot=Q96G03,description=,comment=,)



are each uniprot property values unique:True
generating:annot/annot_node_out.csv
generating:annot/annot_edge_out.csv
In [3]:
### add Gene ontology term to each node of the graph using the included  biomart API
## and the uniprot mapping
 
    



 


      
g=graph_example()


protL=p2g.properties.property_values(g,"uniprot")

      
print("==find GO term using API======")
print("default API configuration (when conf=None)")
print(p2g.properties.__default_apî_conf())
      
species="rnorvegicus"  
#species="hsapiens"

conf={'dataset':species+'_gene_ensembl'}
print("current API configuration")
print(p2g.properties.__current_apî_conf(conf))
      
      
annot_map=p2g.properties.uniprot_to_go(protL,conf)
 

         
new_property="go"
primary_key="uniprot"
case_sensitive=False

p2g.properties.create_property_from_map(g,annot_map, primary_key,new_property,case_sensitive)
        
p2g.graph_explore.describe_graph(g)
        
 
"""
#test API
protL=['ZP4','ZP5','PPARA']

print("==find GO term using API======")

unimap=p2g.properties.uniprot_to_go(protL)


for k in unimap.keys():
   v=unimap[k]
   print("%s:%s" % (k,unimap[k]))

   
""" 
==find GO term using API======
default API configuration (when conf=None)
{'server': 'http://www.ensembl.org', 'mart': 'ENSEMBL_MART_ENSEMBL', 'dataset': 'hsapiens_gene_ensembl', 'attr': ['ensembl_gene_id', 'external_gene_name', 'uniprot_gn_symbol', 'go_id'], 'searchkey': 'uniprot_gn_symbol', 'uni_key': 'UniProtKB Gene Name symbol', 'annot_key': 'GO term accession'}
current API configuration
{'server': 'http://www.ensembl.org', 'mart': 'ENSEMBL_MART_ENSEMBL', 'dataset': 'rnorvegicus_gene_ensembl', 'attr': ['ensembl_gene_id', 'external_gene_name', 'uniprot_gn_symbol', 'go_id'], 'searchkey': 'uniprot_gn_symbol', 'uni_key': 'UniProtKB Gene Name symbol', 'annot_key': 'GO term accession'}
  vertex 0 (uniprot=ACE2,go={'GO:0004175', 'GO:0008233', 'GO:0046813', 'GO:0005886', 'GO:0005737', 'GO:0015827', 'GO:1903598', 'GO:0032800', 'GO:0006508', 'GO:1903779', 'GO:0008237', 'GO:0031526', 'GO:0046718', 'GO:0001618', 'GO:0046872', 'GO:0016787', 'GO:0060452', 'GO:0004180', 'GO:0016020', 'GO:1905737', 'GO:0016324', 'GO:0005615', 'GO:0022898', 'GO:0016021', 'GO:0005576', 'GO:0003051', 'GO:0003081', 'GO:0008241', 'GO:0009986', 'GO:0051957'},)
      0 out_edge:(0, 5) (group=A,)


  vertex 1 (uniprot=ZP4,go={'GO:0042102', 'GO:0042802', 'GO:0016021', 'GO:0060478', 'GO:2000344', 'GO:2000360', 'GO:0007338', 'GO:0002922', 'GO:0035805', 'GO:0032190', 'GO:0016020'},)
      1 out_edge:(1, 5) (group=A,)


  vertex 2 (uniprot=,go=None,)
      2 out_edge:(2, 5) (group=B,)


  vertex 3 (uniprot=PPARA,go={'GO:0030154', 'GO:0010883', 'GO:0010876', 'GO:0008544', 'GO:0038023', 'GO:0001103', 'GO:0003677', 'GO:0001223', 'GO:0043401', 'GO:0014070', 'GO:0045722', 'GO:0006631', 'GO:0070166', 'GO:0045776', 'GO:0010565', 'GO:0010891', 'GO:1901215', 'GO:0008134', 'GO:0097371', 'GO:0045471', 'GO:0019904', 'GO:0045820', 'GO:0010871', 'GO:0035095', 'GO:0045893', 'GO:0032868', 'GO:0009267', 'GO:0005654', 'GO:0008289', 'GO:0042060', 'GO:0010745', 'GO:0032099', 'GO:0046872', 'GO:0005515', 'GO:0000977', 'GO:0031624', 'GO:0042157', 'GO:0001666', 'GO:0008144', 'GO:0010887', 'GO:0042752', 'GO:0019902', 'GO:1903038', 'GO:0001228', 'GO:1902894', 'GO:0019216', 'GO:0006355', 'GO:0003700', 'GO:0045944', 'GO:0008270', 'GO:0007507', 'GO:0042025', 'GO:0043565', 'GO:0046321', 'GO:0010468', 'GO:0032922', 'GO:0003707', 'GO:0050728', 'GO:0009755', 'GO:0061052', 'GO:0051525', 'GO:0090575', 'GO:0001227', 'GO:0033993', 'GO:0048511', 'GO:0045923', 'GO:0005634', 'GO:0000978', 'GO:0030522', 'GO:0032091', 'GO:0044877', 'GO:0004879', 'GO:0019217', 'GO:0000122'},)
      3 out_edge:(3, 5) (group=B,)


  vertex 4 (uniprot=P19367,go=None,)
      4 out_edge:(4, 5) (group=C,)


  vertex 5 (uniprot=JUND,go={'GO:0000976', 'GO:0009612', 'GO:0032870', 'GO:0071277', 'GO:0002076', 'GO:0019899', 'GO:0014070', 'GO:0007568', 'GO:0003677', 'GO:0000790', 'GO:0032993', 'GO:0008134', 'GO:0042493', 'GO:0006357', 'GO:0043434', 'GO:0006366', 'GO:0010033', 'GO:0042127', 'GO:0005654', 'GO:0045597', 'GO:0032991', 'GO:0032496', 'GO:0016922', 'GO:0003713', 'GO:0007623', 'GO:0003690', 'GO:0001228', 'GO:0000981', 'GO:0006355', 'GO:0003700', 'GO:0045944', 'GO:0043565', 'GO:0035976', 'GO:0005667', 'GO:0034097', 'GO:0051726', 'GO:0000978', 'GO:0005634', 'GO:0045669', 'GO:0009416', 'GO:0009314', 'GO:0000122', 'GO:0051591', 'GO:1990837'},)
     5 in_edge:(0, 5) (group=A,)
     5 in_edge:(1, 5) (group=A,)
     5 in_edge:(2, 5) (group=B,)
     5 in_edge:(3, 5) (group=B,)
     5 in_edge:(4, 5) (group=C,)


  vertex 6 (uniprot=Q96G03,go=None,)



Out[3]:
'\n#test API\nprotL=[\'ZP4\',\'ZP5\',\'PPARA\']\n\nprint("==find GO term using API======")\n\nunimap=p2g.properties.uniprot_to_go(protL)\n\n\nfor k in unimap.keys():\n   v=unimap[k]\n   print("%s:%s" % (k,unimap[k]))\n\n   \n'
In [4]:
print("==find HGNC symbol using API======")  
protL=['P19367', 'P35557','Q96G03']

conf={
       'server':'http://www.ensembl.org',
       'mart':'ENSEMBL_MART_ENSEMBL',
       'dataset':'hsapiens_gene_ensembl',
       'attr':[
           'ensembl_gene_id',
           'external_gene_name',
           'uniprot_gn_id',
           'hgnc_symbol'
          ],
       'searchkey' :'uniprot_gn_id',
       'uni_key' : 'UniProtKB Gene Name ID',    
       'annot_key' :'HGNC symbol'
}

g=graph_example()

annot_map=p2g.properties.ensembl_api(protL,conf)
print(annot_map)

new_property="hgnc_symbol"
primary_key="uniprot"
case_sensitive=False

p2g.properties.create_property_from_map(g,annot_map, primary_key,new_property,case_sensitive)
        
p2g.graph_explore.describe_graph(g)
        
    
==find HGNC symbol using API======
{'P19367': {'HK1'}, 'P35557': {'GCK'}, 'Q96G03': {'PGM2'}}
  vertex 0 (uniprot=ACE2,hgnc_symbol=None,)
      0 out_edge:(0, 5) (group=A,)


  vertex 1 (uniprot=ZP4,hgnc_symbol=None,)
      1 out_edge:(1, 5) (group=A,)


  vertex 2 (uniprot=,hgnc_symbol=None,)
      2 out_edge:(2, 5) (group=B,)


  vertex 3 (uniprot=PPARA,hgnc_symbol=None,)
      3 out_edge:(3, 5) (group=B,)


  vertex 4 (uniprot=P19367,hgnc_symbol={'HK1'},)
      4 out_edge:(4, 5) (group=C,)


  vertex 5 (uniprot=JUND,hgnc_symbol=None,)
     5 in_edge:(0, 5) (group=A,)
     5 in_edge:(1, 5) (group=A,)
     5 in_edge:(2, 5) (group=B,)
     5 in_edge:(3, 5) (group=B,)
     5 in_edge:(4, 5) (group=C,)


  vertex 6 (uniprot=Q96G03,hgnc_symbol={'PGM2'},)



Out[4]:
"  vertex 0 (uniprot=ACE2,hgnc_symbol=None,)\n      0 out_edge:(0, 5) (group=A,)\n\n\n  vertex 1 (uniprot=ZP4,hgnc_symbol=None,)\n      1 out_edge:(1, 5) (group=A,)\n\n\n  vertex 2 (uniprot=,hgnc_symbol=None,)\n      2 out_edge:(2, 5) (group=B,)\n\n\n  vertex 3 (uniprot=PPARA,hgnc_symbol=None,)\n      3 out_edge:(3, 5) (group=B,)\n\n\n  vertex 4 (uniprot=P19367,hgnc_symbol={'HK1'},)\n      4 out_edge:(4, 5) (group=C,)\n\n\n  vertex 5 (uniprot=JUND,hgnc_symbol=None,)\n     5 in_edge:(0, 5) (group=A,)\n     5 in_edge:(1, 5) (group=A,)\n     5 in_edge:(2, 5) (group=B,)\n     5 in_edge:(3, 5) (group=B,)\n     5 in_edge:(4, 5) (group=C,)\n\n\n  vertex 6 (uniprot=Q96G03,hgnc_symbol={'PGM2'},)\n\n\n"
In [ ]: