PAX2Graphml BIOPAX filter

  • remove a subset of entities and interations by datasources from a BIOPAX file using PAXTOOLS called from python
  • Generate the reaction graph in GRAPHML
In [1]:
import pax2graphml  as p2g
import timeit
import os.path
rootPath="/home/user/example/"
srcDir=rootPath+"biopax/"
resultPath=rootPath+"result/"
biopaxInputfile=resultPath+"merged_small.owl"
biopaxOutputFile= resultPath+"small_filtered.owl"  
graphmlfileIn=resultPath+"merged_small.graphml"
graphmlfileOut=resultPath+"merged_small_filtered.graphml"

#filter=["mirtarbase","ctd"]
filter=["localbase"]
#example datasources
# bind biogrid corum ctd dip drugbank hprd humancyc innatedb 
# inoh intact intact_complex kegg mirtarbase msigdb netpath 
# panther pid  reconx smpdb wp psp reactome 
dowait=True
#warning large file, can be slow
if dowait==True:
  start = timeit.default_timer()
  response=p2g.pax_import.biopax_filter(biopaxInputfile,filter,biopaxOutputFile)
  print(response)
  stop = timeit.default_timer()
  print("Time: %.2f s"%(  stop - start) ) 
{'message': 'filtered Biopax file generated', 'output': '/home/user/example/result/small_filtered.owl', 'status': 0}
Time: 3.15 s
In [2]:
 
#warning large files,can be slow (many hours on a standard laptop )
dowait=True
#warning large file, can be slow
if dowait==True:
   start = timeit.default_timer() 
   response1=p2g.pax_import.biopax_to_reaction_graph(biopaxInputfile,graphmlfileIn)
   print(response1)  
   stop = timeit.default_timer()
   print("Time: %.2f s"%(  stop - start) ) 
{'message': 'graph file generated', 'output': '/home/user/example/result/merged_small.graphml', 'status': 0}
Time: 147.36 s

generating reaction graph in GRAPHML format

In [3]:
 
dowait=True
#warning : large files,can be slow (many hours for GB files on a standard laptop )
if dowait==True:
   start = timeit.default_timer() 
   print("%s->%s" %(biopaxOutputFile,graphmlfileOut))
   response2=p2g.pax_import.biopax_to_reaction_graph(biopaxOutputFile,graphmlfileOut)
   print(response2)
   stop = timeit.default_timer()
   print("Time: %.2f s"%(  stop - start) ) 
/home/user/example/result/small_filtered.owl->/home/user/example/result/merged_small_filtered.graphml
{'message': 'graph file generated', 'output': '/home/user/example/result/merged_small_filtered.graphml', 'status': 0}
Time: 2.66 s
In [4]:
def details(graphmlfile):

  g=p2g.graph_explore.load_graphml(graphmlfile, directed=True)

  providerList=p2g.properties.property_values(g,"provider")
  print("all datasources:%s\n" %(providerList))

  nodes=p2g.node_list(g)
  edges=p2g.edge_list(g)
  print("nodes count: %s edges count: %s "  %(len(nodes),len(edges)  )   )
 

print("---------input--------------")

if os.path.isfile(graphmlfileIn ):
    
  details(graphmlfileIn)
 
else:
    print("file %s  not found.\nDid you try put dowait=True?" %(graphmlfileIn))

print("---------output--------------")

if os.path.isfile(graphmlfileOut ):
    
  details(graphmlfileOut)
 
else:
    print("file %s  not found.\nDid you try put dowait=True?" %(graphmlfileOut))
---------input--------------
all datasources:['', 'Reactome', 'localbase']

nodes count: 2877 edges count: 6926 
---------output--------------
all datasources:['', 'Reactome']

nodes count: 48 edges count: 65 
In [ ]: