cat test.sh
#!/usr/bin/bash HG38=/path/hg38/hg38.fa GATK=/path/biosoft/gatk3.7/GenomeAnalysisTK.jar java -jar -Xmx10g $GATK -T DepthOfCoverage \ -R $HG38 \ -o ./test \ -I /path2bam/03_bam_processing/03_base_recal/test.sorted_MarkDuplicates_BQSR.bam \ -L /path2bed/target.bed \ --omitDepthOutputAtEachBase --omitIntervalStatistics \ -ct 1 -ct 5 -ct 10 -ct 20 -ct 30 -ct 50 -ct 100
結果生成四個文件java
4096 Jan 13 21:39 ./ 4096 Jan 13 21:05 ../ 6417 Jan 13 21:39 test.sample_cumulative_coverage_counts GenekVIP 6412 Jan 13 21:39 test.sample_cumulative_coverage_proportions 9362 Jan 13 21:39 test.sample_statistics 291 Jan 13 21:39 test.sample_summary
less test.sample_sumary 結果不是很好ios
sample_id total mean granular_third_quartile granular_median granular_first_quartile %_bases_above_1 %_bases_above_5 %_bases_above_10 %_bases_above_20 %_bases_above_3 test 2025198 17.50 1 1 1 5.1 2.7 2.6 2.5 2.5 2.4 2.3 Total 2025198 17.50 N/A N/A N/A
#git
#github
git clone https://github.com/shiquan/bamdst.git cd bamdst/ make
測試:bash
cat test.shapp
#!/usr/bin/bash
/path2bamdst/biosoft/bamdst/bamdst -p /path2bed/target.bed -o ./ ../03_base_recal/test.sorted.markdup.realign.BQSR.bam echo "run status $?"
結果文件:less
1630 Jan 13 16:58 chromosomes.report 4331 Jan 13 16:58 coverage.report 64188 Jan 13 16:58 depth_distribution.plot 838584 Jan 13 16:58 depth.tsv.gz 18119 Jan 13 16:58 insertsize.plot 9519 Jan 13 16:58 region.tsv.gz 0 Jan 13 16:58 uncover.bed
看一下內容:ide
$ less -S chromosomes.report#該文件中存儲的是從bam文件中獲取的染色體深度、覆蓋度信息 Chromosome DATA(%) Avg depth Median Coverage% Cov 4x % Cov 10x % Cov 30x % chrM 100.00 0.26 0.0 5.66 2.83 0.00 0.00 $ less coverage.report#信息太多了,我目前以爲比較重要的有 [Total] Raw Reads (All reads) 15 [Total] Mapped Reads 15 [Total] Properly paired 0#Paired reads with properly insert size [Total] Fraction of Properly paired 0.00% [Total] Read and mate paired 0#read1 and read2 paired. [Total] Fraction of Read and mate paired 0.00% [Total] Map quality cutoff value 20 [Total] MapQuality above cutoff reads 10 [Total] Fraction of MapQ reads in all reads 66.67% [Total] Fraction of MapQ reads in mapped reads 66.67% [Target] Average depth 0.26 [Target] Average depth(rmdup) 0.06 [Target] Coverage (>0x) 5.66% [Target] Coverage (>=4x) 2.83% [Target] Coverage (>=10x) 0.00% [Target] Coverage (>=30x) 0.00% $ less depth_distribution.plot#結合R能夠作出目標區域的深度分佈圖 0 900 0.943396 54 0.056604 1 0 0.000000 54 0.056604 2 0 0.000000 54 0.056604 3 27 0.028302 27 0.028302 4 4 0.004193 23 0.024109 #左邊三列分別表明:覆蓋深度,對應該深度的鹼基數,鹼基佔比(以目標區域的鹼基數爲分母); #右邊兩列是高深度向低深度累加的值,分別是鹼基數及其佔比,累加到X=1爲止。 $ less depth.tsv #Chr Pos Raw Depth Rmdup depth Cover depth chrM 650 8 6 8 chrM 651 8 6 8 chrM 652 8 6 8 chrM 653 9 6 9 chrM 654 9 6 9 #Raw Depth從輸入bam文件中獲得,沒有任何限制; #Rmdup depth去除了PCR重複的reads, 次優比對reads, 低比對質量的reads(mapQ < 20), 該值與samtools depth的輸出深度相似; #默認使用raw depth來統計coverage.report文件中的覆蓋率信息。 若是要使用rmdup depth計算覆蓋率,可以使用參數"--use_rmdup"; #The coverage depth is the raw depth with consider of deletion region, so this value should be equal to or greated than the raw depth. $ less insertsize.plot#作出兩個接頭之間的插入片斷大小的圖,理論上是根據read1和read2在染色體上的座標信息求得,這時read1和read2要求比對到一條染色體上。 $ less region.tsv #Chr Start Stop Avg depth Median Coverage Coverage(FIX) chrM 649 1603 0.26 0.0 5.66 5.66 #目標區域文件每個區域的統計,其中Coverage以X>0來統計 $ less uncover.bed chrM 672 1603 #--uncover的值默認是5,從前面depth_distribution.plot文件中也能看出來,深度小於5的鹼基數就是931; #包含低覆蓋或者是未覆蓋的,經過"--uncover"規定「未覆蓋」的閾值。
REFERENCE 簡書做者:hsy_hzauer 連接:https://www.jianshu.com/p/ac7b8e4d76e4測試