#!/usr/bin/env perl less # Copyright 2018 Jarvan Wang spa if (@ARGV != 1) { hash #print STDERR "Usage: keyword_summarize.pl text utt2dur\n"; it print STDERR "Usage: keyword_summarize.pl <data>\n"; io exit(1); table } foreach my $text_file="$ARGV[0]/text"; file my $utt2dur_file="$ARGV[0]/utt2dur"; perl unless(-e $text_file && -e $utt2dur_file ) sort { print STDERR "$text_file or $utt2dur_file does not exist!"; exit(1); } my %text_hash; my %dur_hash; my %sumdur_hash; my %count_hash; # read text open(TEXT,$text_file); while(<TEXT>){ my $temp=$_; chomp $temp; @line=split(/ /,$temp,2); $text_hash{$line[0]}=$line[1]; } # read utt2dur open(DUR,$utt2dur_file); while(<DUR>){ my $temp=$_; chomp $temp; @line=split(/ /,$temp,2); $dur_hash{$line[0]}=$line[1]; } # summarize text duration for my $key (keys %text_hash) { $sumdur_hash{$text_hash{$key}}+=$dur_hash{$key}; $count_hash{$text_hash{$key}}+=1; } #for my $key (sort keys %sumdur_hash) { #printf("文本@語句數@秒@小時\n"); printf("文本@語句數@小時\n"); my $count_sum,$sec_sum,$hour_sum; foreach my $key (sort { $sumdur_hash{$a} <=> $sumdur_hash{$b} or $a cmp $b } keys %sumdur_hash) { my $value=sprintf("%.2f",$sumdur_hash{$key}); $count_sum+=$count_hash{$key}; $sec_sum+=$value; $hour_sum+=$value/3600; if($value>1000) { #printf("%s@%d@%.2f@%.2f\n",$key,$count_hash{$key},$value,$value/3600); printf("%s@%d@%.2f\n",$key,$count_hash{$key},$value/3600); } } #printf("總和@%d@%.2f@%.2f\n",$count_sum,$sec_sum,$hour_sum); printf("總和@%d@%.2f\n",$count_sum,$hour_sum); |