Download Biopax source files

  • Download & uncompress from python
In [1]:
import os
from urllib.request import urlopen
from urllib.parse import urlparse
import gzip
import shutil
import glob


def download(fileUrl,desDir=""):
   urlp = urlparse(fileUrl)
   dFiLeName=os.path.basename(urlp.path)
   outFile=desDir+dFiLeName
   dfile = urlopen(fileUrl)
   with open(outFile,'wb') as output:
      output.write(dfile.read())

def uncompress(src,dest):  
  with gzip.open(src, 'rb') as f_in:
    with open(dest, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)
        
 
def downloadBIOPAXFiles(destDir,rootUrl):
    
   fileUrl=rootUrl+"PathwayCommons12.dip.BIOPAX.owl.gz"
   download(fileUrl,destDir)
   fileUrl=rootUrl+"PathwayCommons12.kegg.BIOPAX.owl.gz"
   download(fileUrl,destDir)
   fileUrl=rootUrl+"PathwayCommons12.pid.BIOPAX.owl.gz"
   download(fileUrl,destDir)
   fileUrl=rootUrl+"PathwayCommons12.reactome.BIOPAX.owl.gz"
   download(fileUrl,destDir)
   fileUrl=rootUrl+"PathwayCommons12.pathbank.BIOPAX.owl.gz"
   download(fileUrl,destDir)            
   fileUrl=rootUrl+"PathwayCommons12.panther.BIOPAX.owl.gz"           
   download(fileUrl,destDir)
   #fileUrl=rootUrl+"PathwayCommons12.All.BIOPAX.owl.gz"
   #download(fileUrl,destDir)
            
def downloadBIOPAXAll(destDir,rootUrl):
    #warning : this file is very large 
    #download may not conclude and  crash the system        
   
   fileUrl=rootUrl+"PathwayCommons12.All.BIOPAX.owl.gz"
   download(fileUrl,destDir)


print("---")
def uncompressBIOPAXFiles(sourceDir,destDir):

 
  gzl=glob.glob(sourceDir+"/*.gz")

  #print(gzl)
  for gz in gzl:
    nmgz=os.path.basename(gz)
    nm=nmgz.replace(".gz","")
  
    destfile=destDir+"/"+nm
    print(" uncompress %s to %s"%(gz,destfile))
    uncompress(gz,destfile)            

dowait=False
#warning large file, can be slow
rootUrl="https://www.pathwaycommons.org/archives/PC2/v12/"
desdir="biopax_download/"
srcdir="biopax_download/"
if dowait==True:
 downloadBIOPAXFiles(srcdir,rootUrl)
 uncompressBIOPAXFiles(srcdir,desdir)
 #downloadBIOPAXAll(srcdir,rootUrl)          
 
---
In [ ]: