data_summarize.pl data目錄文本時長彙總腳本

#!/usr/bin/env perl less

# Copyright 2018 Jarvan Wang spa

if (@ARGV != 1) { hash

#print STDERR "Usage: keyword_summarize.pl text utt2dur\n"; it

print STDERR "Usage: keyword_summarize.pl <data>\n"; io

exit(1); table

} foreach

my $text_file="$ARGV[0]/text"; file

my $utt2dur_file="$ARGV[0]/utt2dur"; perl

unless(-e $text_file && -e $utt2dur_file ) sort

{

print STDERR "$text_file or $utt2dur_file does not exist!";

exit(1);

}

my %text_hash;

my %dur_hash;

my %sumdur_hash;

my %count_hash;

# read text

open(TEXT,$text_file);

while(<TEXT>){

my $temp=$_;

chomp $temp;

@line=split(/ /,$temp,2);

$text_hash{$line[0]}=$line[1];

}

# read utt2dur

open(DUR,$utt2dur_file);

while(<DUR>){

my $temp=$_;

chomp $temp;

@line=split(/ /,$temp,2);

$dur_hash{$line[0]}=$line[1];

}

# summarize text duration

for my $key (keys %text_hash)

{

$sumdur_hash{$text_hash{$key}}+=$dur_hash{$key};

$count_hash{$text_hash{$key}}+=1;

}

#for my $key (sort keys %sumdur_hash) {

#printf("文本@語句數@@小時\n");

printf("文本@語句數@小時\n");

my $count_sum,$sec_sum,$hour_sum;

foreach my $key (sort { $sumdur_hash{$a} <=> $sumdur_hash{$b} or $a cmp $b } keys %sumdur_hash)

{

my $value=sprintf("%.2f",$sumdur_hash{$key});

$count_sum+=$count_hash{$key};

$sec_sum+=$value;

$hour_sum+=$value/3600;

if($value>1000)

{

#printf("%s@%d@%.2f@%.2f\n",$key,$count_hash{$key},$value,$value/3600);

printf("%s@%d@%.2f\n",$key,$count_hash{$key},$value/3600);

}

}

#printf("總和@%d@%.2f@%.2f\n",$count_sum,$sec_sum,$hour_sum);

printf("總和@%d@%.2f\n",$count_sum,$hour_sum);

相關文章
相關標籤/搜索