extractorData.sh 2.48 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
#!/bin/bash

#Need lattice-to-nbest file in param
dataFile=$1

#Init value
declare -a utteranceValue
declare -a acousticScore
declare -a languageScore

countUtterance=1
previousId=1
acousticScore[previousId]=0
languageScore[previousId]=0


#Position of important colonne 
posName=0
posAcoustic=2
posLanguage=3
posUtterance=4



while read -r line
do
	name="$line"
	arrLine=($line)
	currentId="${arrLine[$posName]##*-}"
Yoann HOUPERT's avatar
Yoann HOUPERT committed
30

31 32 33 34 35 36 37 38
	if [ "$currentId" -eq "$previousId" ]; then
		#Manage all data value before storage
		countUtterance=$((countUtterance + 1))
		utteranceValue[$currentId]="${utteranceValue[currentId]} ${arrLine[$posUtterance]}"
		acousticScore[$currentId]=`echo ${acousticScore[currentId]} + ${arrLine[$posAcoustic]} | bc`
		languageScore[$currentId]=`echo ${languageScore[currentId]} + ${arrLine[$posLanguage]} | bc`	
	else
		#Do stuff on the previous segment before swap
Yoann HOUPERT's avatar
Yoann HOUPERT committed
39 40 41
		acousticScore[$previousId]="$(echo "scale=2; ${acousticScore[$previousId]}/$countUtterance" | bc | sed -e 's/^0*//' -e 's/^\./0./')"
		languageScore[$previousId]="$(echo "scale=2; ${languageScore[$previousId]}/$countUtterance" | bc | sed -e 's/^0*//' -e 's/^\./0./')"

42 43 44 45 46 47 48 49 50
		#Init for the next seglment
		previousId=$currentId
		countUtterance=0
		concatUterance=""
		acousticScore[previousId]=0
		languageScore[previousId]=0

		#Init data for the uterance
		countUtterance=$((countUtterance + 1))
Yoann HOUPERT's avatar
Yoann HOUPERT committed
51
		utteranceValue[$currentId]="${utteranceValue[currentId]}${arrLine[$posUtterance]}"
52 53 54 55 56 57
		acousticScore[$currentId]=`echo ${acousticScore[currentId]} + ${arrLine[$posAcoustic]} | bc`
		languageScore[$currentId]=`echo ${languageScore[currentId]} + ${arrLine[$posLanguage]} | bc`	

	fi
done < "$dataFile"

Yoann HOUPERT's avatar
Yoann HOUPERT committed
58
#Need to manage the last data
Yoann HOUPERT's avatar
Yoann HOUPERT committed
59 60 61
acousticScore[$previousId]="$(echo "scale=2; ${acousticScore[$previousId]}/$countUtterance" | bc | sed -e 's/^0*//' -e 's/^\./0./')"
languageScore[$previousId]="$(echo "scale=2; ${languageScore[$previousId]}/$countUtterance" | bc | sed -e 's/^0*//' -e 's/^\./0./')"

62 63

#start to 1, no id 0 stored
Yoann HOUPERT's avatar
Yoann HOUPERT committed
64
echo -n "{\"hypotheses\":["
65
for i in `seq 1 $previousId`; do
Yoann HOUPERT's avatar
Yoann HOUPERT committed
66 67 68 69 70 71 72 73
		if [ -z "${acousticScore[$i]}" ]; then
				acousticScore[$i]=0.0
		fi

		if [ -z "${languageScore[$i]}" ]; then
				languageScore[$i]=0.0
		fi

74
		if [ "$i" -eq "$previousId" ]; then
Yoann HOUPERT's avatar
Yoann HOUPERT committed
75
			echo -n "{\"utterance\":\"${utteranceValue[$i]}\",\"acousticScore\":${acousticScore[$i]},\"languageScore\":${languageScore[$i]}}"
76
		else
Yoann HOUPERT's avatar
Yoann HOUPERT committed
77
			echo -n "{\"utterance\":\"${utteranceValue[$i]}\",\"acousticScore\":${acousticScore[$i]},\"languageScore\":${languageScore[$i]}},"
78 79
		fi
done
Yoann HOUPERT's avatar
Yoann HOUPERT committed
80
echo -n "]}"