[toc]python
最近因爲老闆有分析項目,我實在是進展緩慢,一直苦惱並艱難的探索和進展,因此很長時間沒有和你們見面了,今天我爲你們帶來的source tracker分析,使用前一段時間剛出來的工具FEAST。git
劉老師對這片文章進行了詳細的解讀: Nature Methods:快速準確的微生物來源追溯工具FEAST。跟着劉老師的步伐,今天我對這個工具進行一個嘗試。爲何做者不將這個工具封裝到R包呢這樣不就更容易了嗎?可能好多小夥伴都沒有從github上克隆過項目。github
不單單是這一次,我在以後所有的分析都會將整個羣落封裝到phylsoeq,只是爲了更好的更加靈活的對微生物羣落數據進行分析,固然你們若是初次見面,可能須要安裝依賴極多的phyloseq包。須要熟悉phylsoeq封裝的結構和調用方法。微信
爲了讓你們更容易操做,我把數據保存爲csv,方便還沒有解除phylsoeq的小夥伴進行無壓力測試。網絡
FEAST提供兩種方式來作微生物來源分析。函數
首先咱們來演示基於單個目標樣品和來源樣品的來源分析工具
# rm(list = ls()) # gc() path = "./phyloseq_7_source_FEAST" dir.create(path) ##導入主函數 source("./FEAST-master/FEAST_src//src.R") ps = readRDS("./a3_DADA2_table/ps_OTU_.ps") # 導入分組文件和OTU表格 metadata <- as.data.frame(sample_data(ps)) head(metadata) write.csv(metadata,"metadata.csv",quote = F) # Load OTU table vegan_otu <- function(physeq){ OTU <- otu_table(physeq) if(taxa_are_rows(OTU)){ OTU <- t(OTU) } return(as(OTU,"matrix")) } otus <- as.data.frame(t(vegan_otu(ps))) write.csv(otus,"otus.csv",quote = F) otus <- t(as.matrix(otus)) ###下面區分目標樣品和來源樣品。 envs <- metadata$SampleType metadata<- arrange(metadata, SampleType) metadata$id = rep(1:6,4) Ids <- na.omit(unique(metadata$id)) it = 1 train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it]) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) # Extract the source environments and source/sink indices num_sources <- length(train.ix) #number of sources COVERAGE = min(rowSums(otus[c(train.ix, test.ix),])) #Can be adjusted by the user #對兩組樣品進行抽平 sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE)) sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE)) dim(sinks) print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0)))) print(paste("Seq depth in the sources and sink samples = ",COVERAGE)) print(paste("The sink is:", envs[test.ix])) # Estimate source proportions for each sink EM_iterations = 1000 # number of EM iterations. default value FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE) Proportions_est <- FEAST_output$data_prop[,1] names(Proportions_est) <- c(as.character(envs[train.ix]), "unknown") print("Source mixing proportions") Proportions_est round(Proportions_est,3)
基於多個目標和來源的微生物來源分析: different_sources_flags設置目標樣品和來源樣品的對應關係。是否不一樣目標對應不一樣來源樣品,仍是不一樣目標對應相同來源樣品學習
##導入主函數 source("./FEAST-master/FEAST_src//src.R") ps = readRDS("./a3_DADA2_table/ps_OTU_.ps") # 導入分組文件和OTU表格 metadata <- as.data.frame(sample_data(ps)) head(metadata) # Load OTU table vegan_otu <- function(physeq){ OTU <- otu_table(physeq) if(taxa_are_rows(OTU)){ OTU <- t(OTU) } return(as(OTU,"matrix")) } otus <- as.data.frame(t(vegan_otu(ps))) otus <- t(as.matrix(otus)) head(metadata) metadata<- arrange(metadata, SampleType) metadata$id = rep(1:6,4) EM_iterations = 1000 #default value different_sources_flag = 1 envs <- metadata$SampleType Ids <- na.omit(unique(metadata$id)) Proportions_est <- list() it = 1 for(it in 1:length(Ids)){ # Extract the source environments and source/sink indices if(different_sources_flag == 1){ train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it]) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) } else{ train.ix <- which(metadata$SampleType%in%c("B","C","D")) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) } num_sources <- length(train.ix) COVERAGE = min(rowSums(otus[c(train.ix, test.ix),])) #Can be adjusted by the user # Define sources and sinks sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE)) sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE)) print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0)))) print(paste("Seq depth in the sources and sink samples = ",COVERAGE)) print(paste("The sink is:", envs[test.ix])) # Estimate source proportions for each sink FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE) Proportions_est[[it]] <- FEAST_output$data_prop[,1] names(Proportions_est[[it]]) <- c(as.character(envs[train.ix]), "unknown") if(length(Proportions_est[[it]]) < num_sources +1){ tmp = Proportions_est[[it]] Proportions_est[[it]][num_sources] = NA Proportions_est[[it]][num_sources+1] = tmp[num_sources] } print("Source mixing proportions") print(Proportions_est[[it]]) } print(Proportions_est) went = as.data.frame(Proportions_est) colnames(went) = paste("repeat_",unique(metadata$id),sep = "") head(went) filename = paste(path,"/FEAST.csv",sep = "") write.csv(went,filename,quote = F)
library(RColorBrewer) library(dplyr) library(graphics) head(went) plotname = paste(path,"/FEAST.pdf",sep = "") pdf(file = plotname,width = 12,height = 12) par(mfrow=c((length(unique(metadata$SampleType))%/%2 +2 ),2), mar=c(1,1,1,1)) # layouts = as.character(unique(metadata$SampleType)) for (i in 1:length(colnames(went))) { labs <- paste0(row.names(went)," \n(", round(went[,i]/sum(went[,i])*100,2), "%)") pie(went[,i],labels=labs, init.angle=90,col = brewer.pal(nrow(went), "Reds"), border="black",main =colnames(went)[i] ) } dev.off()
咱們做爲生物可能測定9個以上重複了,若是展現九個餅圖,那就顯得太誇張了,求均值,展現均值餅圖測試
head(went) asx = as.data.frame(rowMeans(went)) asx = as.matrix(asx) asx_norm = t(t(asx)/colSums(asx)) #* 100 # normalization to total 100 head(asx_norm) plotname = paste(path,"/FEAST_mean.pdf",sep = "") pdf(file = plotname,width = 6,height = 6) labs <- paste0(row.names(asx_norm)," \n(", round(asx_norm[,1]/sum(asx_norm[,1])*100,2), "%)") pie(asx_norm[,1],labels=labs, init.angle=90,col = brewer.pal(nrow(went), "Reds"), border="black",main = "mean of source tracker") dev.off()