The following co-expression coefficient features were attained from COXPRESdb.html
http://coxpresdb.jp/download.shtmlexpress
打開這個頁面咱們點擊bulk downloadapp
而後咱們下載budding yeast 文件。this
在最下面咱們也能夠看到文件格式的說明url
Under the directory named Hsa.coex.v6, 19777 files will appear.spa
Hsa.coex.v6 ----- 1 |-- 10 |-- 100 |-- ... |-- 9997
|
|
|
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 10 10:49:21 2016
@author: sun
"""
import pandas as pd
import os
yeast_gold_protein_pair=pd.read_csv('yeast_gold_protein_pair.csv',usecols=['idA','idB'])
GeneID=pd.read_csv('uniprot_to_geneid.csv',usecols=['Entry','Cross-reference (GeneID)'],index_col=0)
#注loc經過標籤選擇數據,iloc經過位置選擇數據
idA=GeneID.loc[yeast_gold_protein_pair.idA,:]
idB=GeneID.loc[yeast_gold_protein_pair.idB,:]
idA.index=range(len(idA))
idB.index=range(len(idB))
mr=[]
cor=[]
for i in range(len(idA)):
GeneIDA=str(idA.iloc[i].values)
GeneIDB=str(idB.iloc[i].values)
ifGeneIDB!='[nan]'andGeneIDA!='[nan]':
GeneIDA=GeneIDA[2:8]
GeneIDB=int(GeneIDB[2:8])
path='Sce.v14-08.G4461-S3819.rma.mrgeo.d/'+GeneIDA
if os.path.exists(path):
coex=pd.read_csv(path,header=None,sep=' ',index_col=0)
ifGeneIDBin coex.index:
mr.append(coex.loc[GeneIDB,1])
cor.append(coex.loc[GeneIDB,2])
else:
mr.append("nan")
cor.append("nan")
else:
mr.append("nan")
cor.append("nan")
else:
mr.append("nan")
cor.append("nan")
yeast_gold_protein_pair['MR']=mr
yeast_gold_protein_pair['COR']=cor
yeast_gold_protein_pair.to_csv('coexpression.csv',index=False)