單細胞分析實錄(9): 展現marker基因的4種圖形(二)

在上一篇中,我已經講解了展現marker基因的前兩種圖形,分別是tsne/umap圖、熱圖,感興趣的讀者能夠回顧一下。這一節咱們繼續學習堆疊小提琴圖和睦泡圖。app

3. 堆疊小提琴圖展現marker基因

相比於其餘可視化形式,小提琴圖能夠更直觀地展現某一類亞羣的某一個基因的表達分佈狀況。個人marker基因一共選了12個,下面來畫圖:
Seurat內置的VlnPlot函數能夠直接畫,函數

library(xlsx)
markerdf2=read.xlsx("ref_marker2.xlsx",sheetIndex = 1)
markerdf2$gene=as.character(markerdf2$gene)

mye.seu=readRDS("mye.seu.rds")
mye.seu$celltype=factor(mye.seu$celltype,levels = sort(unique(mye.seu$celltype)))
Idents(mye.seu)="celltype"

VlnPlot(mye.seu, features = markerdf2$gene, pt.size = 0, ncol = 1)+
  scale_x_discrete("")+
  theme(
    axis.text.x.bottom = element_blank()
  )
ggsave("vln1.pdf",width = 20,height = 80,units = "cm")

其中pt.size參數表示點的大小,一個點就是一個細胞,通常能夠直接設置爲0,即不顯示點,只畫小提琴,看上去更加清楚。儘管此處我對標度和主題進行了調整,但我發現這隻對單個feature有用,多個feature時就不起做用了,後續就用AI來簡單編輯一下吧。
須要注意的是,圖的顏色是根據亞羣的類別來劃分的,並非根據基因來區分。學習

file

第二種方法,ggplot2代碼以下code

library(reshape2)
vln.df=as.data.frame(mye.seu[["RNA"]]@data[markerdf2$gene,])
vln.df$gene=rownames(vln.df)
vln.df=melt(vln.df,id="gene")
colnames(vln.df)[c(2,3)]=c("CB","exp")

#數據格式以下
# > head(vln.df)
# gene                     CB   exp
# 1 CLEC9A N01_AAACGGGCATTTCAGG_1 0.000
# 2   RGCC N01_AAACGGGCATTTCAGG_1 0.000
# 3 FCER1A N01_AAACGGGCATTTCAGG_1 0.000
# 4   CD1A N01_AAACGGGCATTTCAGG_1 0.000
# 5  FSCN1 N01_AAACGGGCATTTCAGG_1 1.104
# 6   CCR7 N01_AAACGGGCATTTCAGG_1 0.000

anno=mye.seu@meta.data[,c("CB","celltype")]
vln.df=inner_join(vln.df,anno,by="CB")
vln.df$gene=factor(vln.df$gene,levels = markerdf2$gene) #爲了控制畫圖的基因順序

vln.df%>%ggplot(aes(celltype,exp))+geom_violin(aes(fill=gene),scale = "width")+
  facet_grid(vln.df$gene~.,scales = "free_y")+
  scale_fill_brewer(palette = "Set3",direction = 1)+
  scale_x_discrete("")+scale_y_continuous("")+
  theme_bw()+
  theme(
    axis.text.x.bottom = element_text(angle = 45,hjust = 1,vjust = 1),
    panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
    legend.position = "none"
  )
ggsave("vln2.pdf",width = 11,height = 22,units = "cm")

geom_violin()函數的scale參數爲"width"時,全部小提琴有相同的寬度,默認是"area",有相同的面積;facet_grid()用來分面,文中用的是多行一列,scales = "free_y"表示不一樣行之間能夠有不一樣範圍的y值;scale_fill_brewer()使用ColorBrewer調色板。blog

file

這個圖的顏色根據基因來區分,有時可能還會看到小提琴圖的顏色是用亞羣某個基因的表達均值來映射的,好比element

vln.df$celltype_gene=paste(vln.df$celltype,vln.df$gene,sep = "_")
stat.df=as.data.frame(vln.df%>%group_by(celltype,gene)%>%summarize(mean=mean(exp)))
stat.df$celltype_gene=paste(stat.df$celltype,stat.df$gene,sep = "_")
stat.df=stat.df[,c("mean","celltype_gene")]
vln.df=inner_join(vln.df,stat.df,by="celltype_gene")
vln.df$mean=ifelse(vln.df$mean > 3, 3, vln.df$mean)
#這裏的閾值3要提早綜合全部基因看一下
vln.df%>%ggplot(aes(celltype,exp))+geom_violin(aes(fill=mean),scale = "width")+
  facet_grid(vln.df$gene~.,scales = "free_y")+
  scale_fill_gradient(limits=c(0,3),low = "lightgrey",high = "yellow")+
  scale_x_discrete("")+scale_y_continuous("",expand = c(0.02,0))+
  theme_bw()+
  theme(
    panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
    axis.text.x.bottom = element_text(angle = 45,hjust = 1,vjust = 1)
  )
ggsave("vln3.pdf",width = 11,height = 22,units = "cm")

file

4. 氣泡圖展現marker基因

Seurat的畫法是這樣的,it

DotPlot(mye.seu, features = markerdf2$gene)+RotatedAxis()+
  scale_x_discrete("")+scale_y_discrete("")
#其他的微調同ggplot2

file

第二種方法,ggplot2代碼以下io

bubble.df=as.matrix(mye.seu[["RNA"]]@data[markerdf2$gene,])
bubble.df=t(bubble.df)
bubble.df=as.data.frame(scale(bubble.df))
bubble.df$CB=rownames(bubble.df)
bubble.df=merge(bubble.df,mye.seu@meta.data[,c("CB","celltype")],by = "CB")
bubble.df$CB=NULL

celltype_v=c()
gene_v=c()
mean_v=c()
ratio_v=c()
for (i in unique(bubble.df$celltype)) {
  bubble.df_small=bubble.df%>%filter(celltype==i)
  for (j in markerdf2$gene) {
    exp_mean=mean(bubble.df_small[,j])
    exp_ratio=sum(bubble.df_small[,j] > min(bubble.df_small[,j])) / length(bubble.df_small[,j])
    celltype_v=append(celltype_v,i)
    gene_v=append(gene_v,j)
    mean_v=append(mean_v,exp_mean)
    ratio_v=append(ratio_v,exp_ratio)
  }
}

plotdf=data.frame(
  celltype=celltype_v,
  gene=gene_v,
  exp=mean_v,
  ratio=ratio_v
)
plotdf$celltype=factor(plotdf$celltype,levels = sort(unique(plotdf$celltype)))
plotdf$gene=factor(plotdf$gene,levels = rev(as.character(markerdf2$gene)))
plotdf$exp=ifelse(plotdf$exp>3,3,plotdf$exp)
plotdf%>%ggplot(aes(x=celltype,y=gene,size=ratio,color=exp))+geom_point()+
  scale_x_discrete("")+scale_y_discrete("")+
  scale_color_gradientn(colours = rev(c("#FFD92F","#FEE391",brewer.pal(11, "Spectral")[7:11])))+
  scale_size_continuous(limits = c(0,1))+theme_bw()+
  theme(
    axis.text.x.bottom = element_text(hjust = 1, vjust = 1, angle = 45)
  )
ggsave(filename = "bubble2.pdf",width = 9,height = 12,units = c("cm"))

file

這兩種方法具體函數定義略有差別,因此氣泡圖看上去不太同樣ast


到這裏,marker基因的可視化就結束了,基本就是這些。若是你以爲上述內容對你有用,歡迎轉發,點贊!有任何疑問能夠在公衆號後臺提出,我都會回覆的。pdf

因水平有限,有錯誤的地方,歡迎批評指正!

相關文章
相關標籤/搜索