Commit d29cb548 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/online: implement online feature extraction code; merge trunk.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@3335 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parents d8a9d424 13c27149
# DNN hybrid system training parameters
dnn_num_hidden_layers=5
dnn_input_dim=5000
dnn_output_dim=500
dnn_num_hidden_layers=4
dnn_input_dim=4000
dnn_output_dim=400
dnn_minibatch_size=128
dnn_init_learning_rate=0.008
dnn_final_learning_rate=0.0008
dnn_max_change=10
dnn_num_jobs=8
dnn_num_threads=1
dnn_mixup=5000
dnn_mixup=12000
dnn_parallel_opts="-l gpu=1,hostname=g*"
bnf_every_nth_frame=2 # take every 2nd frame.
......@@ -18,7 +18,10 @@ use_pitch=false
lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_bnf_extra_opts=( --min-lmwt 13 --max-lmwt 18 )
lmwt_dnn_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_dnn_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
dnn_beam=16.0
dnn_lat_beam=8.5
icu_opt=(--use-icu true --icu-transform Any-Lower)
......
......@@ -18,7 +18,10 @@ use_pitch=false
lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_bnf_extra_opts=( --min-lmwt 13 --max-lmwt 18 )
lmwt_dnn_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_dnn_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
dnn_beam=16.0
dnn_lat_beam=8.5
icu_opt=(--use-icu true --icu-transform Any-Lower)
......
......@@ -332,6 +332,7 @@ if [ -f exp/tri6_nnet/.done ]; then
if [ ! -f $decode/.done ]; then
mkdir -p $decode
steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \
--beam $dnn_beam --lat-beam $dnn_lat_beam \
--skip-scoring true "${decode_extra_opts[@]}" \
--transform-dir exp/tri5/decode_${dirid} \
exp/tri5/graph ${datadir} $decode |tee $decode/decode.log
......
#!/bin/bash
# Copyright 2013 Daniel Povey
# Apache 2.0.
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
echo "usage: $0 trials-file [scores-file]"
echo "e.g.: $0 data/sre08_trials/short2-short3-female.trials foo"
exit 1;
fi
trials=$1
[ ! -f $trials ] && echo "Expecting trials file $trials to exist"
if [ $# -eq 2 ]; then
scores=$2
tempfile=
else
tempfile=$(mktemp)
scores=$tempfile
cat > $tempfile # put the standard input into tempfile.
fi
echo "Scoring against $trials"
printf '% 12s' 'Condition:'
for condition in $(seq 8); do
printf '% 7d' $condition;
done
echo
printf '% 12s' 'EER:'
for condition in $(seq 8); do
eer=$(awk '{print $3}' $scores | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | compute-eer - 2>/dev/null)
printf '% 7.2f' $eer
done
echo
rm $tempfile 2>/dev/null
exit 0;
#!/bin/bash
# Copyright 2013 Daniel Povey
# Apache 2.0.
#
# See README.txt for more info on data required.
# Results (EERs) are inline in comments below.
#
# This example script is still a bit of a mess, and needs to be
# cleaned up, but it shows you all the basic ingredients.
......@@ -58,16 +61,16 @@ utils/subset_data_dir.sh data/fisher_male 4000 data/fisher_male_4k
utils/subset_data_dir.sh data/fisher_female 4000 data/fisher_female_4k
sid/train_diag_ubm.sh --cmd "$train_cmd" data/fisher_2k 2048 exp/diag_ubm_2048
sid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd" data/fisher_2k 2048 exp/diag_ubm_2048
sid/train_full_ubm.sh --cmd "$train_cmd" data/fisher_4k exp/diag_ubm_2048 exp/full_ubm_2048
sid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/fisher_4k exp/diag_ubm_2048 exp/full_ubm_2048
# Get male and female versions of the UBM in one pass; make sure not to remove
# any Gaussians due to low counts (so they stay matched). This will be more convenient
# for gender-id.
sid/train_full_ubm.sh --remove-low-count-gaussians false --num-iters 1 --cmd "$train_cmd" \
sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false --num-iters 1 --cmd "$train_cmd" \
data/fisher_male_4k exp/full_ubm_2048 exp/full_ubm_2048_male &
sid/train_full_ubm.sh --remove-low-count-gaussians false --num-iters 1 --cmd "$train_cmd" \
sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false --num-iters 1 --cmd "$train_cmd" \
data/fisher_female_4k exp/full_ubm_2048 exp/full_ubm_2048_female &
wait
......@@ -91,8 +94,11 @@ sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \
# The script below demonstrates the gender-id script. We don't really use
# it for anything here, because the SRE 2008 data is already split up by
# gender and gender identification is not required for the eval.
# It prints out the error rate based on the info in the spk2gender file;
# see exp/gender_id_fisher/error_rate where it is also printed.
sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \
data/fisher exp/gender_id_fisher
# Gender-id error rate is 2.58%
# Extract the iVectors for the Fisher data.
sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \
......@@ -124,14 +130,17 @@ cat $trials | awk '{print $1, $2}' | \
scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp \
foo
# This condition, the telephone-only condition, is the only one for which we get results
# in the ballpark of the SRE08 official results-- this makes sense because we used Fisher
# for system development data, and this is the only condition that matches the train
# condition.
condition=6
awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | \
compute-eer -
# LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 11.5854%, at threshold 55.3259
local/score_sre08.sh $trials foo
#Scoring against data/sre08_trials/short2-short3-female.trials
# Condition: 1 2 3 4 5 6 7 8
# EER: 29.07 4.48 28.89 20.57 19.83 11.14 7.35 7.89
# The following shows a more direct way to get the scores.
#condition=6
#awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | \
# compute-eer -
# LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 11.1419%, at threshold 55.9827
# Note: to see how you can plot the DET curve, look at
# local/det_curve_example.sh
......@@ -140,24 +149,23 @@ awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n ==
### Demonstrate what happens if we reduce the dimension with LDA
ivector-compute-lda --dim=150 --total-covariance-factor=0.1 \
'ark:ivector-normalize-length scp:exp/ivectors_male/ivector.scp ark:- |' ark:data/fisher_male/utt2spk \
exp/ivectors_male/transform.mat
'ark:ivector-normalize-length scp:exp/ivectors_fisher_male/ivector.scp ark:- |' ark:data/fisher_male/utt2spk \
exp/ivectors_fisher_male/transform.mat
ivector-compute-lda --dim=150 --total-covariance-factor=0.1 \
'ark:ivector-normalize-length scp:exp/ivectors_female/ivector.scp ark:- |' ark:data/fisher_female/utt2spk \
exp/ivectors_female/transform.mat
'ark:ivector-normalize-length scp:exp/ivectors_fisher_female/ivector.scp ark:- |' ark:data/fisher_female/utt2spk \
exp/ivectors_fisher_female/transform.mat
trials=data/sre08_trials/short2-short3-female.trials
cat $trials | awk '{print $1, $2}' | \
ivector-compute-dot-products - \
'ark:ivector-transform exp/ivectors_female/transform.mat scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |' \
'ark:ivector-transform exp/ivectors_female/transform.mat scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |' \
'ark:ivector-transform exp/ivectors_fisher_female/transform.mat scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |' \
'ark:ivector-transform exp/ivectors_fisher_female/transform.mat scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |' \
foo
condition=6
awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | \
compute-eer -
# LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 10.7539%, at threshold 36.7174
local/score_sre08.sh $trials foo
#Scoring against data/sre08_trials/short2-short3-female.trials
# Condition: 1 2 3 4 5 6 7 8
# EER: 24.16 2.69 24.06 13.96 14.66 10.48 6.59 6.84
......@@ -174,8 +182,9 @@ ivector-plda-scoring --num-utts=ark:exp/ivectors_sre08_train_short2_female/num_u
"ark:ivector-subtract-global-mean scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp ark:- |" \
"ark:ivector-subtract-global-mean scp:exp/ivectors_sre08_test_short3_female/ivector.scp ark:- |" \
"cat $trials | awk '{print \$1, \$2}' |" foo
condition=6
awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | \
compute-eer -
#LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 8.81375%, at threshold -116.896
local/score_sre08.sh $trials foo
#Scoring against data/sre08_trials/short2-short3-female.trials
# Condition: 1 2 3 4 5 6 7 8
# EER: 20.55 2.09 20.76 17.27 12.14 8.59 4.69 4.74
......@@ -134,7 +134,7 @@ if [ $stage -le 2 ]; then
fi
if [ $stage -le 3 ] && [ -f $data/spk2gender ]; then
utils/apply_map.pl -f 2 data/train/spk2gender <data/train/utt2spk | \
utils/apply_map.pl -f 2 $data/spk2gender <$data/utt2spk | \
utils/filter_scp.pl $dir/utt2gender > $dir/utt2gender.ref
n1=$(cat $dir/utt2gender | wc -l)
n2=$(cat $dir/utt2gender.ref | wc -l)
......@@ -145,7 +145,7 @@ if [ $stage -le 3 ] && [ -f $data/spk2gender ]; then
n3=$(cat $dir/utt2gender.incorrect | wc -l)
err=$(perl -e "printf('%.2f', (100.0 * $n3 / $n1));")
echo "Gender-id error rate is $err%"
echo "Gender-id error rate is $err%" | tee $dir/error_rate
fi
......
......@@ -5,10 +5,11 @@ all:
include ../kaldi.mk
TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \
feature-functions-test
feature-functions-test pitch-functions-test
OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \
feature-spectrogram.o mel-computations.o wave-reader.o pitch-functions.o
feature-spectrogram.o mel-computations.o wave-reader.o \
pitch-functions.o online-feature.o
LIBNAME = kaldi-feat
......
......@@ -29,7 +29,7 @@ int32 NumFrames(int32 nsamp,
const FrameExtractionOptions &opts) {
int32 frame_shift = opts.WindowShift();
int32 frame_length = opts.WindowSize();
assert(frame_shift != 0 && frame_length != 0);
KALDI_ASSERT(frame_shift != 0 && frame_length != 0);
if (static_cast<int32>(nsamp) < frame_length)
return 0;
else
......@@ -49,7 +49,7 @@ void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value) {
void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff) {
if (preemph_coeff == 0.0) return;
assert(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
KALDI_ASSERT(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
for (int32 i = waveform->Dim()-1; i > 0; i--)
(*waveform)(i) -= preemph_coeff * (*waveform)(i-1);
(*waveform)(0) -= preemph_coeff * (*waveform)(0);
......@@ -59,7 +59,7 @@ void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff) {
FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts) {
int32 frame_length = opts.WindowSize();
assert(frame_length > 0);
KALDI_ASSERT(frame_length > 0);
window.Resize(frame_length);
for (int32 i = 0; i < frame_length; i++) {
BaseFloat i_fl = static_cast<BaseFloat>(i);
......@@ -89,11 +89,11 @@ void ExtractWindow(const VectorBase<BaseFloat> &wave,
BaseFloat *log_energy_pre_window) {
int32 frame_shift = opts.WindowShift();
int32 frame_length = opts.WindowSize();
assert(window_function.window.Dim() == frame_length);
assert(frame_shift != 0 && frame_length != 0);
KALDI_ASSERT(window_function.window.Dim() == frame_length);
KALDI_ASSERT(frame_shift != 0 && frame_length != 0);
int32 start = frame_shift*f, end = start + frame_length;
assert(start >= 0 && end <= wave.Dim());
assert(window != NULL);
KALDI_ASSERT(start >= 0 && end <= wave.Dim());
KALDI_ASSERT(window != NULL);
int32 frame_length_padded = opts.PaddedWindowSize();
if (window->Dim() != frame_length_padded)
......@@ -130,10 +130,10 @@ void ExtractWaveformRemainder(const VectorBase<BaseFloat> &wave,
int32 num_frames = NumFrames(wave.Dim(), opts);
// offset is the amount at the start that has been extracted.
int32 offset = num_frames * frame_shift;
assert(wave_remainder != NULL);
KALDI_ASSERT(wave_remainder != NULL);
int32 remaining_len = wave.Dim() - offset;
wave_remainder->Resize(remaining_len);
assert(remaining_len >= 0);
KALDI_ASSERT(remaining_len >= 0);
if (remaining_len > 0)
wave_remainder->CopyFromVec(SubVector<BaseFloat>(wave, offset, remaining_len));
}
......@@ -143,7 +143,7 @@ void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
int32 dim = waveform->Dim();
// no, letting it be non-power-of-two for now.
// assert(dim > 0 && (dim & (dim-1) == 0)); // make sure a power of two.. actually my FFT code
// KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0)); // make sure a power of two.. actually my FFT code
// does not require this (dan) but this is better in case we use different code [dan].
// RealFft(waveform, true); // true == forward (not inverse) FFT; makes no difference here,
......@@ -165,9 +165,9 @@ void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
assert(opts.order >= 0 && opts.order < 1000); // just make sure we don't get binary junk.
KALDI_ASSERT(opts.order >= 0 && opts.order < 1000); // just make sure we don't get binary junk.
// opts will normally be 2 or 3.
assert(opts.window > 0 && opts.window < 1000); // again, basic sanity check.
KALDI_ASSERT(opts.window > 0 && opts.window < 1000); // again, basic sanity check.
// normally the window size will be two.
scales_.resize(opts.order+1);
......@@ -181,7 +181,7 @@ DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
// work if instead we later make it an array and do opts.window[i-1],
// or something like that. "window" is a parameter specifying delta-window
// width which is actually 2*window + 1.
assert(window != 0);
KALDI_ASSERT(window != 0);
int32 prev_offset = (static_cast<int32>(prev_scales.Dim()-1))/2,
cur_offset = prev_offset + window;
cur_scales.Resize(prev_scales.Dim() + 2*window); // also zeros it.
......@@ -200,11 +200,11 @@ DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
void DeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const {
assert(frame < input_feats.NumRows());
VectorBase<BaseFloat> *output_frame) const {
KALDI_ASSERT(frame < input_feats.NumRows());
int32 num_frames = input_feats.NumRows(),
feat_dim = input_feats.NumCols();
assert(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
KALDI_ASSERT(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
output_frame->SetZero();
for (int32 i = 0; i <= opts_.order; i++) {
const Vector<BaseFloat> &scales = scales_[i];
......@@ -276,7 +276,7 @@ void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out) {
BaseFloat ComputeLpc(const VectorBase<BaseFloat> &autocorr_in,
Vector<BaseFloat> *lpc_out) {
int32 n = autocorr_in.Dim() - 1;
assert(lpc_out->Dim() == n);
KALDI_ASSERT(lpc_out->Dim() == n);
Vector<BaseFloat> tmp(n);
BaseFloat ans = Durbin(n, autocorr_in.Data(),
lpc_out->Data(),
......
......@@ -206,7 +206,7 @@ class DeltaFeatures {
void Process(const MatrixBase<BaseFloat> &input_feats,
int32 frame,
SubVector<BaseFloat> *output_frame) const;
VectorBase<BaseFloat> *output_frame) const;
private:
DeltaFeaturesOptions opts_;
std::vector<Vector<BaseFloat> > scales_; // a scaling window for each
......
......@@ -86,7 +86,7 @@ class Mfcc {
explicit Mfcc(const MfccOptions &opts);
~Mfcc();
int32 Dim() { return opts_.num_ceps; }
int32 Dim() const { return opts_.num_ceps; }
/// Will throw exception on failure (e.g. if file too short for even one
/// frame). The output "wave_remainder" is the last frame or two of the
......@@ -100,6 +100,7 @@ class Mfcc {
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL);
typedef MfccOptions Options;
private:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
MfccOptions opts_;
......
......@@ -100,13 +100,14 @@ class Plp {
explicit Plp(const PlpOptions &opts);
~Plp();
int32 Dim() { return opts_.num_ceps; }
int32 Dim() const { return opts_.num_ceps; }
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL);
typedef PlpOptions Options;
private:
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
const Vector<BaseFloat> *GetEqualLoudness(BaseFloat vtln_warp);
......
// feat/online-feature.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "feat/online-feature.h"
namespace kaldi {
template<class C>
void OnlineMfccOrPlp<C>::GetFeature(int32 frame, VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(frame >= 0 && frame < num_frames_);
KALDI_ASSERT(feat->Dim() == Dim());
feat->CopyFromVec(features_.Row(frame));
};
template<class C>
bool OnlineMfccOrPlp<C>::IsLastFrame(int32 frame) const {
return (frame == num_frames_ - 1 && input_finished_);
}
template<class C>
OnlineMfccOrPlp<C>::OnlineMfccOrPlp(const typename C::Options &opts):
mfcc_or_plp_(opts), input_finished_(false), num_frames_(0),
sampling_frequency_(opts.frame_opts.samp_freq) { }
template<class C>
void OnlineMfccOrPlp<C>::AcceptWaveform(BaseFloat sampling_rate,
const VectorBase<BaseFloat> &waveform) {
if (waveform.Dim() == 0) {
return; // Nothing to do.
}
if (input_finished_) {
KALDI_ERR << "AcceptWaveform called after InputFinished() was called.";
}
if (sampling_rate != sampling_frequency_) {
KALDI_ERR << "Sampling frequency mismatch, expected "
<< sampling_frequency_ << ", got " << sampling_rate;
}
Vector<BaseFloat> appended_wave;
const VectorBase<BaseFloat> &wave_to_use = (waveform_remainder_.Dim() != 0 ?
appended_wave : waveform);
if (waveform_remainder_.Dim() != 0) {
appended_wave.Resize(waveform_remainder_.Dim() +
waveform.Dim());
appended_wave.Range(0, waveform_remainder_.Dim()).CopyFromVec(
waveform_remainder_);
appended_wave.Range(waveform_remainder_.Dim(),
waveform.Dim()).CopyFromVec(waveform);
}
waveform_remainder_.Resize(0);
Matrix<BaseFloat> feats;
BaseFloat vtln_warp = 1.0; // We don't support VTLN warping in this wrapper.
mfcc_or_plp_.Compute(wave_to_use, vtln_warp, &feats, &waveform_remainder_);
if (feats.NumRows() == 0) {
// Presumably we got a very small waveform and could output no whole
// features. The waveform will have been appended to waveform_remainder_.
return;
}
int32 new_num_frames = num_frames_ + feats.NumRows();
BaseFloat increase_ratio = 1.5; // This is a tradeoff between memory and
// compute; it's the factor by which we
// increase the memory used each time.
if (new_num_frames > features_.NumRows()) {
int32 new_num_rows = std::max<int32>(new_num_frames,
features_.NumRows() * increase_ratio);
// Increase the size of the features_ matrix and copy over any existing
// data.
features_.Resize(new_num_rows, Dim(), kCopyData);
}
features_.Range(num_frames_, feats.NumRows(), 0, Dim()).CopyFromMat(feats);
num_frames_ = new_num_frames;
}
// instantiate the templates defined here for MFCC and PLP classes.
template class OnlineMfccOrPlp<Mfcc>;
template class OnlineMfccOrPlp<Plp>;
int32 OnlineSpliceFrames::NumFramesReady() const {
int32 num_frames = src_->NumFramesReady();
if (num_frames > 0 && src_->IsLastFrame(num_frames-1))
return num_frames;
else
return std::max<int32>(0, num_frames - right_context_);
}
void OnlineSpliceFrames::GetFeature(int32 frame, VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(left_context_ >= 0 && right_context_ >= 0);
KALDI_ASSERT(frame > 0 && frame < NumFramesReady());
int32 dim_in = src_->Dim();
KALDI_ASSERT(feat->Dim() == dim_in * (1 + left_context_ + right_context_));
int32 T = src_->NumFramesReady();
for (int32 t2 = frame - left_context_; t2 <= frame + right_context_; t2++) {
int32 t2_limited = t2;
if (t2_limited < 0) t2_limited = 0;
if (t2_limited >= T) t2_limited = T - 1;
int32 n = t2 - (frame - left_context_); // 0 for left-most frame, increases to
// the right.
SubVector<BaseFloat> part(*feat, n * dim_in, dim_in);
src_->GetFeature(t2_limited, &part);
}
}
OnlineLda::OnlineLda(const Matrix<BaseFloat> &transform,
OnlineFeatureInterface *src): src_(src) {
int32 src_dim = src_->Dim();
if (transform.NumCols() == src_dim) { // Linear transform
linear_term_ = transform;
offset_.Resize(transform.NumRows()); // Resize() will zero it.
} else if (transform.NumCols() == src_dim + 1) { // Affine transform
linear_term_.CopyFromMat(transform.Range(0, transform.NumRows(),
0, src_dim));
offset_.Resize(transform.NumRows());
offset_.CopyColFromMat(transform, src_dim);
} else {
KALDI_ERR << "Dimension mismatch: source features have dimension "
<< src_dim << " and LDA #cols is " << transform.NumCols();
}
}
void OnlineLda::GetFeature(int32 frame, VectorBase<BaseFloat> *feat) {
Vector<BaseFloat> input_feat(linear_term_.NumCols());
src_->GetFeature(frame, &input_feat);
feat->CopyFromVec(offset_);
feat->AddMatVec(1.0, linear_term_, kNoTrans, input_feat, 1.0);
}
int32 OnlineDeltaFeatures::Dim() const {
int32 src_dim = src_->Dim();
return src_dim * (1 + opts_.order);
}
int32 OnlineDeltaFeatures::NumFramesReady() const {
int32 num_frames = src_->NumFramesReady(),
context = opts_.order * opts_.window;
// "context" is the number of frames on the left or (more relevant
// here) right which we need in order to produce the output.
if (num_frames > 0 && src_->IsLastFrame(num_frames-1))
return num_frames;
else
return std::max<int32>(0, num_frames - context);
}
void OnlineDeltaFeatures::GetFeature(int32 frame,
VectorBase<BaseFloat> *feat) {
KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
KALDI_ASSERT(feat->Dim() == Dim());
// We'll produce a temporary matrix containing the features we want to
// compute deltas on, but truncated to the necessary context.
int32 context = opts_.order * opts_.window;
int32 left_frame = frame - context,
right_frame = frame + context,
src_frames_ready = src_->NumFramesReady();
if (left_frame < 0) left_frame = 0;
if (right_frame >= src_frames_ready)
right_frame = src_frames_ready - 1;
KALDI_ASSERT(right_frame >= left_frame);
int32 temp_num_frames = right_frame + 1 - left_frame,
src_dim = src_->Dim();
Matrix<BaseFloat> temp_src(temp_num_frames, src_dim);
for (int32 t = left_frame; t <= right_frame; t++) {
SubVector<BaseFloat> temp_row(temp_src, t - left_frame);
src_->GetFeature(t, &temp_row);
}
int32 temp_t = frame - left_frame; // temp_t is the offset of frame "frame"
// within temp_src
delta_features_.Process(temp_src, temp_t, feat);
}
OnlineDeltaFeatures::OnlineDeltaFeatures(const DeltaFeaturesOptions &opts,
OnlineFeatureInterface *src):
src_(src), opts_(opts), delta_features_(opts) { }
} // namespace kaldi
// feat/online-feature-functions.h
// feat/online-feature.h
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
......@@ -18,8 +18,8 @@
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
#ifndef KALDI_FEAT_ONLINE_FEATURE_H_
#define KALDI_FEAT_ONLINE_FEATURE_H_
#include <string>
#include <vector>
......@@ -30,20 +30,21 @@