Download Biopax source files
import os
from urllib.request import urlopen
from urllib.parse import urlparse
import gzip
import shutil
import glob
def download(fileUrl,desDir=""):
urlp = urlparse(fileUrl)
dFiLeName=os.path.basename(urlp.path)
outFile=desDir+dFiLeName
dfile = urlopen(fileUrl)
with open(outFile,'wb') as output:
output.write(dfile.read())
def uncompress(src,dest):
with gzip.open(src, 'rb') as f_in:
with open(dest, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
def downloadBIOPAXFiles(destDir,rootUrl):
fileUrl=rootUrl+"PathwayCommons12.dip.BIOPAX.owl.gz"
download(fileUrl,destDir)
fileUrl=rootUrl+"PathwayCommons12.kegg.BIOPAX.owl.gz"
download(fileUrl,destDir)
fileUrl=rootUrl+"PathwayCommons12.pid.BIOPAX.owl.gz"
download(fileUrl,destDir)
fileUrl=rootUrl+"PathwayCommons12.reactome.BIOPAX.owl.gz"
download(fileUrl,destDir)
fileUrl=rootUrl+"PathwayCommons12.pathbank.BIOPAX.owl.gz"
download(fileUrl,destDir)
fileUrl=rootUrl+"PathwayCommons12.panther.BIOPAX.owl.gz"
download(fileUrl,destDir)
#fileUrl=rootUrl+"PathwayCommons12.All.BIOPAX.owl.gz"
#download(fileUrl,destDir)
def downloadBIOPAXAll(destDir,rootUrl):
#warning : this file is very large
#download may not conclude and crash the system
fileUrl=rootUrl+"PathwayCommons12.All.BIOPAX.owl.gz"
download(fileUrl,destDir)
print("---")
def uncompressBIOPAXFiles(sourceDir,destDir):
gzl=glob.glob(sourceDir+"/*.gz")
#print(gzl)
for gz in gzl:
nmgz=os.path.basename(gz)
nm=nmgz.replace(".gz","")
destfile=destDir+"/"+nm
print(" uncompress %s to %s"%(gz,destfile))
uncompress(gz,destfile)
dowait=False
#warning large file, can be slow
rootUrl="https://www.pathwaycommons.org/archives/PC2/v12/"
desdir="biopax_download/"
srcdir="biopax_download/"
if dowait==True:
downloadBIOPAXFiles(srcdir,rootUrl)
uncompressBIOPAXFiles(srcdir,desdir)
#downloadBIOPAXAll(srcdir,rootUrl)