目標:使用R腳本從ensembl上下載transcript數據this
簡單粗暴,直接上代碼。biomaRt的介紹晚一點更新。orm
# this file helps extract information from ensembl with gene name as input # 11/02/2018, pxy7896 library(biomaRt) # 使用參數 # args=commandArgs(T) # 從文件讀取 geneNamesFile <- "geneNames.txt" otherInfoFile <- "otherInfo.txt" raw <- read.table(geneNamesFile, col.names = c("geneNames"), stringsAsFactors = FALSE) # geneNames is character geneNames <- raw[["geneNames"]] #geneNames <- args[1] otherInfo <- read.table(otherInfoFile, stringsAsFactors = FALSE) dataSet <- otherInfo[[1]][1] # choose database #dataSet <- args[2] mart = useMart("ensembl", dataset=dataSet) attr <- c("hgnc_symbol", "ensembl_transcript_id", "chromosome_name", "transcript_start", "transcript_end") # get transcript ids ids <- getBM(attributes = attr, filters = "hgnc_symbol", values = geneNames, mart = mart) write.table(ids, "ids.txt", sep="\t", quote=FALSE, row.names=FALSE) targetIds <- ids["ensembl_transcript_id"] # get exons info exonAttr <- c("5_utr_start","5_utr_end","3_utr_start","3_utr_end","strand", "ensembl_transcript_id", "ensembl_exon_id", "exon_chrom_start", "exon_chrom_end") #attr2 <- c(attr, exonAttr) result <- getBM(attributes = exonAttr, filters = "ensembl_transcript_id", values = targetIds, mart = mart) write.table(result, args[4], sep = "\t", quote = FALSE, row.names=FALSE)