R-biomaRt使用-代碼備份

目標:使用R腳本從ensembl上下載transcript數據this

簡單粗暴,直接上代碼。biomaRt的介紹晚一點更新。orm

# this file helps extract information from ensembl with gene name as input 
# 11/02/2018, pxy7896

library(biomaRt)

# 使用參數
# args=commandArgs(T)
# 從文件讀取
geneNamesFile <- "geneNames.txt"
otherInfoFile <- "otherInfo.txt"

raw <- read.table(geneNamesFile, col.names = c("geneNames"), stringsAsFactors = FALSE)
# geneNames is character
geneNames <- raw[["geneNames"]]
#geneNames <- args[1]
otherInfo <- read.table(otherInfoFile, stringsAsFactors = FALSE)
dataSet <- otherInfo[[1]][1]
# choose database
#dataSet <- args[2]
mart = useMart("ensembl", dataset=dataSet)
attr <- c("hgnc_symbol", "ensembl_transcript_id", "chromosome_name", "transcript_start", "transcript_end")
# get transcript ids
ids <- getBM(attributes = attr, filters = "hgnc_symbol", values = geneNames, mart = mart)
write.table(ids, "ids.txt", sep="\t", quote=FALSE, row.names=FALSE)
targetIds <- ids["ensembl_transcript_id"]
# get exons info
exonAttr <- c("5_utr_start","5_utr_end","3_utr_start","3_utr_end","strand", "ensembl_transcript_id", "ensembl_exon_id", "exon_chrom_start", "exon_chrom_end")
#attr2 <- c(attr, exonAttr)
result <- getBM(attributes = exonAttr, filters = "ensembl_transcript_id", values = targetIds, mart = mart)
write.table(result, args[4], sep = "\t", quote = FALSE, row.names=FALSE)
相關文章
相關標籤/搜索