-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathreadits_start_end_content.pl
executable file
·141 lines (118 loc) · 5.84 KB
/
readits_start_end_content.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/local/bin/perl
# Anne S. Warlaumont
# This tool allows you to create new files with:
# 1.) Child vocalization start and end times, as well as the duration of speech related (utt) and not speech related (cry);
# 2.) Adult vocalization start and end times; and
# 3.) Child vocalization start and end times, as well as the duration of speech related (utt) and not speech related (cry) AND Adult vocalization start and end times
# Takes 6 command line arguments:
# 1: The (path and) file name of the input file. (e.g. "e20070225_191245_003110.its")
# 2: The (path and) file name of the child output file. (e.g. "e20070225_191245_003110_CHNStartEndUttCryTimes.txt")
# 3: The (path and) file name of the adult output file. (e.g. "e20070225_191245_003110_ANStartAndEndTimes.txt")
# 4: The (path and) file name of the combined child + adult output file. (e.g. "e20070225_191245_003110_CHN_AN_Segments.txt")
# 5: The mode for dealing with overlap: "IgnoreOverlap", "TreatOverlapAsAdult", "TreatOverlapAsChild", "DeleteOverlap", "IncludeOverlapExcludeIntervening", or "IncludeOverlapIgnoreIntervening" (recommended value is "IgnoreOverlap")
# 6: The maximum length the recording should be truncated at (in seconds). For a 16 hour recording, use 57600.
# Instructions:
# 1.) Open up a unix shell (e.g., the Terminal application under Utilities on Mac or Cygwin on Windows)
# 2.) Navigate to the directory where "readits_start_end_content.pl" is located (e.g. ~/Desktop/lena-its-tools/)
# 3.) Run readits_start_end_content.pl with the (path and) file name as the first argument, the (path and) file name of the child output file as the second argument;
# the (path and) file name of the adult output file as the third argument; the (path and) file name of the combine child and adult output file as the third argument;
# the mode for dealing with the overlap as the fifth argument, and the maximum length the recording should be truncated at as the sixth argument;
# (e.g. perl readits_start_end_content. pl e20070225_191245_003110.its e20070225_191245_003110_CHNStartEndUttCryTimes.txt e20070225_191245_003110_ANStartAndEndTimes.txt e20070225_191245_003110_CHN_AN_Segments.txt "IgnoreOverlap" 57600)
use strict;
use warnings;
my $maxsecs = $ARGV[5];
print $ARGV[1]; print "\n";
open INPUTFILE, $ARGV[0] or die "Could not open input file " . $ARGV[0] . "\n";
open CHNOUTPUTFILE, ">", $ARGV[1] or die "Could not open child output file" . $ARGV[1] . "\n";
open ANOUTPUTFILE, ">", $ARGV[2] or die "Could not open adult output file\n";
open COMBINED_OUTPUTFILE, ">", $ARGV[3] or die "Could not open combined output file\n";
my $overlapMode = $ARGV[4];
my $totalOverlapTime = 0;
while (my $line = <INPUTFILE>){
chomp($line);
if ($line=~ m/Segment spkr="OLN"/){
my $overlapStartTime = $line;
my $overlapEndTime = $line;
$overlapStartTime =~ s/.*startTime="PT//g;
$overlapStartTime =~ s/S" endTime=.*//g;
if ($overlapStartTime >=$maxsecs){
last;
}
$overlapEndTime =~ s/.*endTime="PT//g;
$overlapEndTime =~ s/S".*//g;
if ($overlapEndTime >= $maxsecs){
$overlapEndTime = $maxsecs;
}
$totalOverlapTime = $totalOverlapTime + $overlapEndTime - $overlapStartTime;
if ($overlapMode=~ m/IncludeOverlap/){
print COMBINED_OUTPUTFILE "OLN\t$overlapStartTime\t$overlapEndTime\n";
}
if ($overlapEndTime == $maxsecs){
last;
}
}
if (($line=~ m/Segment spkr="CHN"/) || (($line=~ m/Segment spkr="OLN"/) && ($overlapMode eq "TreatOverlapAsChild"))) { # detect whether the line is for a CHN segment
my $startTime = $line;
my $endTime = $line;
my $childUttLen = $line;
my $childCryVfxLen = $line;
$startTime =~ s/.*startTime="PT//g;
$startTime =~ s/S" endTime=.*//g;
if ($startTime >=$maxsecs){
last;
}
$endTime =~ s/.*endTime="PT//g;
$endTime =~ s/S".*//g;
if ($endTime >= $maxsecs){
$endTime = $maxsecs;
}
if ($line=~m/Segment spkr="CHN"/){
$childUttLen =~ s/.*childUttLen="PT?//g;
$childUttLen =~ s/S".*//g;
$childCryVfxLen =~ s/.*childCryVfxLen="PT?//g;
$childCryVfxLen =~ s/S".*//g;
}
elsif ($line=~ m/Segment spkr="OLN"/){
$childUttLen = "NA";
$childCryVfxLen = "NA";
}
my $realEndTime = $endTime;
if ($overlapMode eq "DeleteOverlap"){
$startTime = $startTime - $totalOverlapTime;
$endTime = $endTime - $totalOverlapTime;
}
print CHNOUTPUTFILE "$startTime\t$endTime\t$childUttLen\t$childCryVfxLen\n";
print COMBINED_OUTPUTFILE "CHN\t$startTime\t$endTime\t$childUttLen\t$childCryVfxLen\n";
if ($realEndTime == $maxsecs){
last;
}
}
if (($line=~ m/Segment spkr="FAN"/)||($line=~ m/Segment spkr="MAN"/)|| (($line=~ m/Segment spkr="OLN"/) && ($overlapMode eq "TreatOverlapAsAdult"))){
my $startTime = $line;
my $endTime = $line;
$startTime =~ s/.*startTime="PT//g;
$startTime =~ s/S" endTime=.*//g;
if ($startTime >=$maxsecs){
last;
}
$endTime =~ s/.*endTime="PT//g;
$endTime =~ s/S".*//g;
if ($endTime >= $maxsecs){
$endTime = $maxsecs;
}
my $realEndTime = $endTime;
if ($overlapMode eq "DeleteOverlap"){
$startTime = $startTime - $totalOverlapTime;
$endTime = $endTime - $totalOverlapTime;
}
print ANOUTPUTFILE "$startTime\t$endTime\n";
print COMBINED_OUTPUTFILE "AN\t$startTime\t$endTime\n";
if ($realEndTime == $maxsecs){
last;
}
}
}
close(INPUTFILE);
close(CHNOUTPUTFILE);
close(ANOUTPUTFILE);
close(COMBINED_OUTPUTFILE);