@@ -34,21 +34,21 @@ startTime=$(date +'%F-%H-%M')
3434echo " started at" $startTime
3535
3636# download the data.
37- for part in train_data_01 train_data_02 test_data_01; do
37+ for part in train_data_01 train_data_02train_data_03 test_data_01; do
3838local/download_and_untar.sh$data $part
3939done
4040
4141# download the LM resources
4242local/download_lm.sh data/local/lm
4343
4444# format the data as Kaldi data directories
45- for part in train_data_01 train_data_02 test_data_01; do
45+ for part in train_data_01 train_data_02train_data_03 test_data_01; do
4646# use underscore-separated names in data directories.
4747local/data_prep.sh$data /$part data/$( echo$part | sed s/-/_/g)
4848done
4949
5050# update segmentation of transcripts
51- for part in train_data_01 train_data_02 test_data_01; do
51+ for part in train_data_01 train_data_02train_data_03 test_data_01; do
5252local/updateSegmentation.sh data/$part data/local/lm
5353done
5454
@@ -74,10 +74,10 @@ mfccdir=mfcc
7474hostInAtlas=" ares hephaestus jupiter neptune"
7575if [[! -z $( echo$hostInAtlas | grep -o$( hostname -f) ) ]]; then
7676 mfcc=$( basename mfccdir) # in case was absolute pathname (unlikely), get basename.
77- utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER /kaldi-data/zeroth-kaldi /s5/$mfcc /storage \
77+ utils/create_split_dir.pl /mnt/{ares,hephaestus,jupiter,neptune}/$USER /kaldi-data/zeroth/s5/$mfcc /storage \
7878$mfccdir /storage
7979fi
80- for part in train_data_01 train_data_02 test_data_01; do
80+ for part in train_data_01 train_data_02train_data_03 test_data_01; do
8181steps/make_mfcc.sh --cmd" $train_cmd " --nj$nCPU data/$part exp/make_mfcc/$part $mfccdir
8282steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
8383done
8686utils/combine_data.sh data/merged data/train_data_01 data/test_data_01
8787local/split_dataset.sh --ratio 20 data/merged data/trainset_01 data/testset_01
8888local/split_dataset.sh --ratio 20 data/train_data_02 data/trainset_02 data/testset_02
89+ local/split_dataset.sh --ratio 20 data/train_data_03 data/trainset_03 data/testset_03
8990
9091# Merge trainsets and testsets
91- utils/combine_data.sh data/train_clean data/trainset_01 data/trainset_02
92- utils/combine_data.sh data/test_clean data/testset_01 data/testset_02
92+ utils/combine_data.sh data/train_clean data/trainset_01 data/trainset_02 data/trainset_03
93+ utils/combine_data.sh data/test_clean data/testset_01 data/testset_02 data/testset_03
9394
9495# Make some small data subsets for early system-build stages.
9596utils/subset_data_dir.sh --shortest data/train_clean 2000 data/train_2kshort