版權聲明:本文爲博主原創文章,轉載請註明出處
app
咱們日常多見的基因突變熱圖是一個基因一個格子,一種突變類型,但實際上在同一個病人中,同一個基因每每具備多種突變類型,所以傳統的熱圖繪製工具並不能知足咱們繪圖的須要。應研究須要,本人本身寫了一個熱圖繪製函數,內部調用image 進行熱圖的繪製, barplot進行直方圖繪製, 用data.table進行數據處理。對於一個基因內多種突變類型如何表現出來的問題, 這個函數先採用image將初步的熱圖繪製出來,再使用points,以方塊形式將第二種突變,第三種突變依次添加, 在添加的同時方塊位置稍爲移動而且伴隨着大小的略微縮小,以實現更好的顯示效果,最多能在一個熱圖格子上表示四種突變。
函數以下, 須要安裝並加載data.table 1.10.4, 加載RColorBrewer函數
my_heatmap <- function(vr, pal = c("#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(1,2)],"#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(4,5)],brewer.pal(n = 8, name ="Accent")[c(1,4,6,8,2,3,5,7)],"#E31A1C","#6A3D9A"),type = c("DEL","LOSS","NEUTRAL","GAIN","AMPL","nonsynonymous SNV","synonymous SNV","intronic","stopgain","nonframeshift deletion","splicing", "frameshift deletion","UTR3","frameshift insertion","UTR5"),order_gene = T, order_patient = T, hist_plot = T, legend_dist = 0.4, col_text_cex = 1, row_text_cex = 1, sub_gene= NULL,heatmap_mar = c(5,17,1,2), heatmap_oma=c(0.2,0.2,0.2,0.2),heatmap_mex=0.5, legend_mar = c(1,0,4,1),xlab_adj=1, order_omit=c("NEUTRAL"), annotation_col=NULL, annotation_colors = NULL, heatmap_height = 3, heatmap_width = 3, anno_height=NULL) { if((length(pal) - length(type)) !=1 ){stop("Pal must be one longer than type, because first one pal is col for no mutation")} if(!is.null(sub_gene)){ pal_dt <- data.table(pal, type=c("NoMut",type)) vr <- vr[Gene %in% sub_gene,] type <- pal_dt[type %in% unique(vr$Type),type] pal <- c(pal[1],pal_dt[type, on="type"][,pal]) }else{ pal_dt <- data.table(pal, type=c("NoMut",type)) type <- pal_dt[type %in% unique(vr$Type),type] pal <- c(pal[1],pal_dt[type, on="type"][,pal]) } dt <- unique(vr[,.(Gene,Type,Patient)]) dt$Type <- factor(dt$Type, levels = type) if(order_gene){gene <- dt[!Type %in% order_omit,.(N=length(unique(Patient))),by=Gene][order(N),Gene]}else{gene <- unique(dt[!Type %in% order_omit, Gene])} dt$Gene <- factor(dt$Gene, levels = gene) if(order_patient){patient <- data.table(table(vr[!Type %in% order_omit,]$Patient))[order(-N),V1]}else{patient <- unique(dt[!Type %in% order_omit, Patient])} dt$Patient <- factor(dt$Patient, levels = c(patient, setdiff(unique(dt$Patient),patient))) setkey(dt, "Type") n <- length(unique(dt$Type)) dt$Gene_Patients <- paste(dt$Gene, dt$Patient) dt_inf <- dt[,.N,by=.(Gene, Patient)] max_mut_num <- max(dt_inf$N) dt[,Mut_num:=seq_len(.N),by=.(Patient,Gene)] #main plot dt1 <- copy(dt) dt1[Mut_num !=1, Type:=NA] dc <- data.frame(dcast(dt1, Patient ~ Gene, value.var = "Type", fun.aggregate = function(x)(x[!is.na(x)][1]))) rownames(dc)<- dc[,1] data_matrix<-data.matrix(dc[,-1]) data_matrix[is.na(data_matrix)] <- 0 pal=pal breaks<-seq(-1,10,1) if(!hist_plot & is.null(annotation_col)){ layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(8,2), heights=c(1,1)) par(mar=heatmap_mar, oma=heatmap_oma, mex=heatmap_mex) }else if(hist_plot & is.null(annotation_col)){ layout(matrix(c(2,4,1,3),2,2,byrow=TRUE), widths=c(heatmap_width,1), heights=c(1,heatmap_height), TRUE) par(mar=heatmap_mar) }else if(hist_plot & !is.null(annotation_col)){ if(is.null(anno_height)){anno_height <- 0.02 * ncol(annotation_col)} layout(matrix(data=c(3,5,2,5,1,4), nrow=3, ncol=2, byrow=TRUE), widths=c(heatmap_width,1), heights=c(1, anno_height, heatmap_height)) par(mar=heatmap_mar, oma=heatmap_oma, mex=heatmap_mex) }else if(!hist_plot & !is.null(annotation_col)){ if(is.null(anno_height)){anno_height <- 0.02 * ncol(annotation_col)} layout(matrix(data=c(2,4,1,3), nrow=2, ncol=2, byrow=TRUE), widths=c(8,2), heights=c(anno_height,1)) par(mar=heatmap_mar, oma=heatmap_oma, mex=heatmap_mex) } image(x=1:nrow(data_matrix),y=1:ncol(data_matrix), z=data_matrix,xlab="",ylab="",breaks=breaks, col=pal[1:11],axes=FALSE) #sub plot add_plot <- function(dt, i){ dt1 <- copy(dt) dt1[Mut_num != i, Type:=NA] dc <- data.frame(dcast(dt1, Patient ~ Gene, value.var = "Type", fun.aggregate = function(x){ifelse(length(x) >1,x[!is.na(x)][1],factor(NA))})) rownames(dc)<- dc[,1] data_matrix <- data.matrix(dc[,-1]) xy <- which(data_matrix !=0, arr.ind = T) #apply(xy, 1, function(x)points(x[1], x[2],pch=15, cex=2.5 -0.5*i, col=pal[data_matrix[x[1],x[2]]+1])) apply(xy, 1, function(x)points(x[1]-0.6+i*0.25, x[2],pch=15, cex=1.2 - i*0.08, col=pal[data_matrix[x[1],x[2]]+1])) } ploti <- data.frame(i=2:max_mut_num) apply(ploti, 1, function(i){print(add_plot(dt, i))}) text(x=1:nrow(data_matrix)+0.1, y=par("usr")[1] - xlab_adj, srt = 90, adj = 0.5, labels = rownames(data_matrix), xpd = TRUE, cex=col_text_cex) axis(2,at=1:ncol(data_matrix),labels=colnames(data_matrix), col="white",las=1, cex.lab=0.1, cex.axis=row_text_cex) abline(h=c(1:ncol(data_matrix))+0.5,v=c(1:nrow(data_matrix))+0.5, col="white",lwd=2,xpd=F) #add annotation plot if(!is.null(annotation_col)){ change_factor <- function(x){as.numeric(factor(x, labels = 1:length(levels(x))))} #change infomation to numeric colname_tmp <- colnames(annotation_col) annotation_col_mt <- as.matrix(apply(annotation_col, 2, change_factor)) rownames(annotation_col_mt) <- rownames(annotation_col) colnames(annotation_col_mt) <- colname_tmp annotation_col_mt <- annotation_col_mt[rownames(data_matrix),] ## change infomation numric to unique number cumsum <- 0 if(!is.null(dim(annotation_col_mt))){#if more than one column, cummulate info numeric for(i in 1:ncol(as.data.frame(annotation_col_mt))){ annotation_col_mt[,i] <- cumsum + annotation_col_mt[,i] cumsum <- max(annotation_col_mt[,i]) } } ## get color according to infomation get_color <- function(anno){return(annotation_colors[[anno]][levels(annotation_col[,anno])])} palAnn <- NULL if(is.null(dim(annotation_col_mt))){ rowname_tmp <- rownames(annotation_col_mt) annotation_col_mt <- as.matrix(annotation_col_mt, nrow=1) rownames(annotation_col_mt) <- rowname_tmp colnames(annotation_col_mt) <- colname_tmp } for(anno in colnames(annotation_col_mt)){ palAnn <- c(palAnn, get_color(anno)) } par(mar=c(0,heatmap_mar[2], 0, heatmap_mar[4])) image(x=1:nrow(annotation_col_mt), y=1:ncol(annotation_col_mt), z= annotation_col_mt, col=palAnn, xlab="",ylab="", axes=FALSE) axis(2,at=1:ncol(annotation_col_mt),labels=colnames(annotation_col_mt), col="white",las=1, cex.lab=0.1, cex.axis=row_text_cex) abline(h=c(1:ncol(annotation_col_mt))+0.5,v=c(1:nrow(annotation_col_mt))+0.5, col="white",lwd=2,xpd=F) } if(hist_plot){ #hist par(mar=c(0,2+0.5,3,heatmap_mar[4]-0.9)) patient_dt <- dt[,.N,by=.(Patient,Type)] mt <- data.frame(dcast(patient_dt, Type ~ Patient, value.var = "N")) data_matrix <- data.matrix(mt[,-1]) rownames(data_matrix) <- mt[,1] tryCatch(data_matrix <- data_matrix[setdiff(type, order_omit), patient], error = function(e){print("type argument or your patient name format(include "-" and so on )")}) data_matrix[is.na(data_matrix)] <- 0 omit_idx <- NULL for(i in order_omit){omit_idx <- c(omit_idx,1+which(type == i))} barplot(data_matrix, col=pal[-c(1,omit_idx)],space=0,border = "white",axes=T,xlab="",ann=F, xaxt="n") par(mar=c( heatmap_mar[1]-2 , 0.8, heatmap_mar[3]+2.2, 3),las=1) gene_dt <- dt[,.N,by=.(Gene,Type)] mt <- data.frame(dcast(gene_dt, Type ~ Gene, value.var = "N")) data_matrix <- data.matrix(mt[,-1]) rownames(data_matrix) <- mt[,1] gene <- gsub("ATM,", "ATM.", gene) tryCatch(data_matrix <- data_matrix[setdiff(type, order_omit), gene], error = function(e){print("type argument or check your gene name format(please not include "-" and so on)")}) data_matrix[is.na(data_matrix)] <- 0 barplot(data_matrix, col=pal[-c(1,omit_idx)],space=0,border = "white",axes=T,xlab="", ann=F, horiz = T, yaxt="n") } #add legend par(mar=legend_mar) plot(3, 8, axes=F, ann=F, type="n") if(is.null(annotation_col)){ ploti <- data.frame(i=1:length(type)) }else{ #add annotation legend ploti <- data.frame(i=1:(length(type) + max(annotation_col_mt))) pal <- c("NULL", palAnn, pal[-1]) anno_label <- NULL for (anno in colnames(annotation_col)){ anno_label <- c(anno_label, levels(annotation_col[[anno]])) } type <- c(anno_label,type) } if(!hist_plot){ tmp <- apply(ploti, 1, function(i){print(points(2, 10+(length(type)-i)*legend_dist, pch=15, cex=2, col=pal[i+1]))}) tmp <- apply(ploti, 1, function(i){print(text(3, 10+(length(type)-i)*legend_dist, labels = type[i],pch=15, cex=1, col="black"))}) } if(hist_plot){ tmp <- apply(ploti, 1, function(i){print(points(2, 5+(length(type)-i)*legend_dist, pch=15, cex=0.9, col=pal[i+1]))}) tmp <- apply(ploti, 1, function(i){print(text(2.8, 5+(length(type)-i)*legend_dist, labels = type[i],pch=15, cex=0.9, col="black"))}) } }
描述:
繪製一個基因能夠同時顯示多種突變類型的熱圖,輸入三列的data table數據框, 列名分別是Gene,Type和 Patient,輸出熱圖, 還能夠在熱圖上方和右方添加突變的直方圖。
用法:工具
my_heatmap(vr, pal = c("#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(1,2)],"#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(4,5)],brewer.pal(n = 8, name ="Accent")[c(1,4,6,8,2,3,5,7)],"#E31A1C","#6A3D9A"),type = c("DEL","LOSS","NEUTRAL","GAIN","AMPL","nonsynonymous SNV","synonymous SNV","intronic","stopgain","nonframeshift deletion","splicing", "frameshift deletion","UTR3","frameshift insertion","UTR5"),order_gene = T, order_patient = T, hist_plot = T, legend_dist = 0.4, col_text_cex = 1, row_text_cex = 1, sub_gene= NULL,heatmap_mar = c(5,17,1,2), heatmap_oma=c(0.2,0.2,0.2,0.2),heatmap_mex=0.5, legend_mar = c(1,0,4,1),xlab_adj=1, order_omit=c("NEUTRAL"), annotation_col=NULL, annotation_colors = NULL, heatmap_height = 3, heatmap_width = 3, anno_height=NULL)
參數:
vr: 含有變異數據的數據框,共三列,列名分別是Gene, Type, Patient;
pal: 色板,向量,須要根據數據框中突變類型的數量進行自定義,須要比突變類型多一種顏色做爲背景色,背景色放在第一位;
type:色板相對應的突變類型,向量,type必須等於或者多於數據中所出現的全部類型;默認使用拷貝數的四種突變類型加上拷貝數中性再加上annovar中全部突變類型;自定義設置時長度要比色板少1;
order_gene: 默認T, 對基因按照突變的病人數目進行排序;
order_patient:默認T,對病人按照突變的基因數目進行排序;
hist_plot:默認T,在上方和右方加上對應的直方圖;
legend_dist:默認0.4,調整圖例之間相互的距離,通常須要自行調整;
col_text_cex:調整病人名稱的大小,默認1;
row_text_cex:調節基因名稱的大小,默認1;
xlab_adj:調整病人名稱與熱圖之間的距離;
sub_gene:只選擇部分基因進行畫圖,須要給基因名的向量,而且基因須要在數據中存在,默認NULL;
heatmap_mar:mar參數,調整熱圖先後左右的邊緣長度,默認c(5,17,1,2);
heatmap_oma:oma參數,調整熱圖先後左右的外邊緣長度,默認c(0.2,0.2,0.2,0.2);
mex:調整熱圖的mex參數,用於描繪繪圖邊緣的座標,默認0.5;
legend_mar:legend的mar參數,調整圖例的位置,默認c(1,0,4,1);
order_omit:排序時忽略的變異類型,這些突變類型在直方圖中也會被過濾,默認c("NEUTRAL"),若是不存在"NEUTRAL"這種突變類型,也能夠保持默認參數;
heatmap_height:熱圖的高度,對於基因數目很是多的狀況,在設置pdf長度的基礎上,能夠設置heatmap覆蓋在畫布的高度來讓格子;
heatmap_width:熱圖的寬度,對於病人數目很是多的狀況,能夠設置加長熱圖的長度。
annotation_col:對病人進行註釋的信息,一個data frame,行名爲病人,列名爲不一樣的病理信息,數據必須是因子,例子以下(在代碼例子中我簡略了SmokingInfo爲Smoking,OldInfo 爲Old)spa
SmokingInfo OldInfo P1 Smoking Old P2 NonSmoking Unold P3 NonSmoking Unold P4 NonSmoking Old P5 Smoking Unold P6 NonSmoking Unold
annotation_colors:對病理信息匹配不一樣的顏色, 例子以下, 須要跟annotation_col中的列名和因子匹配3d
$SmokingInfo Smoking NonSmoking "#FDB462" "#80B1D3" $OldInfo Old Unold "#FB8072" "#8DD3C7"
anno_height:設置註釋的高度,默認會自動調節。code
細節:orm
使用例子:blog
#without hist plot pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_cnv_mut.pdf", height=12, width = 12) my_heatmap(vr, heatmap_mar = c(17,17,1,2),hist_plot = F, legend_dist=0.1, xlab_adj = 1.2, order_patient = T, order_gene = T) dev.off() #with hist plot pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_hist_cnv_mut.pdf", height=12, width = 12) my_heatmap(vr, heatmap_mar = c(17,7,1,2),hist_plot = T, legend_dist=0.3, xlab_adj = 1.2, order_patient = T, order_gene = T) dev.off() #only a few gene pdf("~/project/PE/fromws02/PE/cnv_plot/Assoc_CN1.pdf", height=2,width = 14) my_heatmap(vr, heatmap_mar = c(7,17,1,2), sub_gene = c("CDKN2A", "GNAQ", "NOTCH1", "RB1", "SMAD4", "ABL1"),hist_plot = F,legend_dist=0.2, xlab_adj = 0.9, order_omit = "NEUTRAL") dev.off() #with annotation and hist annotation_col <- data.frame(Smoking = factor(sample(c("Smoking", "NonSmoking"), length(unique(vr$Patient)), replace = T)), Old=factor(sample(c("Old", "Unold"), 53, replace = T))) rownames(annotation_col) <- unique(vr$Patient) annotation_colors <- list(Smoking =c(Smoking = "#FDB462", NonSmoking = "#80B1D3"), Old=c(Old = "#FB8072", Unold = "#8DD3C7")) pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_hist_cnv_mut.pdf", height=15, width = 12) my_heatmap(vr, heatmap_mar = c(15,10,1,2), legend_mar = c(1,0,1,1), hist_plot = T, legend_dist=0.2, xlab_adj = 1.2, order_patient = T, order_gene = T, annotation_col=annotation_col, annotation_colors=annotation_colors) dev.off() #with annotation and without hist pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_hist_cnv_mut.pdf", height=12, width = 12) my_heatmap(vr, heatmap_mar = c(17,10,1,2),hist_plot = F, legend_dist=0.1, xlab_adj = 1.2, order_patient = T, order_gene = T, annotation_col=annotation_col, annotation_colors=annotation_colors) dev.off()