#!/usr/bin/perl # by lhtk : lhtk80t7@gmail.com use strict; use warnings; use Bio::Tools::GFF; use feature 'switch'; use experimental 'smartmatch'; #use Getopt::Long; my $transcript_id_file = $ARGV[0]; my $gff_in = $ARGV[1]; my $gff_out = $ARGV[2]; die "根據 transcript_id 來過濾 gff3 文件。\n\nUsage:\n\t$0 transcript_id_file gff3_in gff3_out\n" unless (defined $transcript_id_file and defined $gff_in and defined $gff_out); die "文件已存在!" if ((-e $gff_out) and ($gff_out ne 'tmp.gff3')); open IDS, '<', $transcript_id_file or die $!; my %transcript_ids; my %gene_ids; while (<IDS>) { next if /^\s*$/; chomp; $transcript_ids{$_} = 1; my $gid = (split /\./, $_)[0]; $gene_ids{$gid} = 1; } close IDS; print "Get transcript_ids and gene_ids.\n\n"; # debug # while (my($k,$v) = each %transcript_ids) { # print "$k => $v\n"; # } # while (my($k,$v) = each %gene_ids) { # print "$k => $v\n"; # } my $gffio_i = Bio::Tools::GFF->new(-file => $gff_in, -gff_version => 3); my $gffio_o = Bio::Tools::GFF->new(-file => '>' . $gff_out, -gff_version => 3); while (my $f = $gffio_i->next_feature()) { # debug # print $f->primary_tag(), "\n"; given ($f->primary_tag()) { when (/predicted_gene/) { # my $id = $f->primary_id(); # 也能夠 my ($id) = $f->get_tag_values('ID'); # print "$id\n"; # debug next unless exists $gene_ids{$id}; } when (/mRNA/) { my ($id) = $f->get_tag_values('ID'); next unless exists $transcript_ids{$id}; } when (/exon|CDS/) { my ($parent) = $f->get_tag_values('Parent'); next unless exists $transcript_ids{$parent}; } default { die "Do you know this? -- ", $f->primary_tag(), "\n"; } } $gffio_o->write_feature($f); } $gffio_i->close(); $gffio_o->close(); print "Done!\n";
獲取 gff 文件第一列數據:$feat->seq_id()
,更多請參考Bio::SeqFeature::Generic
。less