Commit a74219c8 authored by Yoann HOUPERT's avatar Yoann HOUPERT

generate multiple utterance with acoustic and language score

parent f02e73c9
......@@ -141,7 +141,6 @@ class DecodeRequestHandler(tornado.web.RequestHandler):
@gen.coroutine
def receive_response(self, message):
logging.debug("Forwarding transcription to client")
self.write({'transcript': message})
os.remove(TEMP_FILE_PATH+self.uuid+'.wav')
self.set_status(200, "Transcription succeded")
......@@ -173,8 +172,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
logging.debug("Message received from worker:" + message)
else:
if 'transcription' in json_msg.keys(): #Receive the file path to process
response = json.dumps({'transcript':json_msg['transcription'].encode('utf-8')})
logging.debug("Response send by worker : %s" % response)
response = json.dumps({'hypotheses' :[json_msg['transcription'].encode('utf-8')]})
self.client_handler.receive_response(json.dumps({'transcript':json_msg['transcription']}))
self.client_handler = None
self.application.available_workers.add(self)
......
......@@ -43,6 +43,7 @@ WORKDIR $BASE_DIR
# Install tornado
COPY requirements.txt .
RUN pip2 install -r requirements.txt
RUN apt-get install bc
# Deploy our offline server
COPY . .
......
......@@ -70,6 +70,12 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
echo "run fmllr decoding"
$lvcsrRootDir/scripts/steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $num_threads --skip-scoring "true" \
$gmmdir/Graph $datadir $transdir || exit 1
gunzip -c $transdir/lat.1.gz |\
lattice-to-nbest --acoustic-scale=0.0883 --n=10 --lm-scale=1.0 ark:- ark:- | \
nbest-to-ctm --precision=1 ark:- - | utils/int2sym.pl -f 5 $gmmdir/Graph/words.txt > $transdir/indice_confiance_brut.txt
$lvcsrRootDir/scripts/./extractorData.sh $transdir/indice_confiance_brut.txt > $transdir/indice_confiance.txt
fi
mv $transdir $lvcsrRootDir/trans
cat $lvcsrRootDir/trans/decode_$fileRootName/log/decode.1.log | grep -v "#" | grep -v "LOG" | grep -v "gmm-latgen-faster" | grep -v "splice-feats" | grep -v "transform-feats" | grep -v "apply-cmvn" | awk '{$1=""; print $0}' | sed 's/^[ ]//' > $lvcsrRootDir/trans/decode_$fileRootName.log
......
#!/bin/bash
#Need lattice-to-nbest file in param
dataFile=$1
#Init value
declare -a utteranceValue
declare -a acousticScore
declare -a languageScore
countUtterance=1
previousId=1
acousticScore[previousId]=0
languageScore[previousId]=0
#Position of important colonne
posName=0
posAcoustic=2
posLanguage=3
posUtterance=4
while read -r line
do
name="$line"
arrLine=($line)
currentId="${arrLine[$posName]##*-}"
if [ "$currentId" -eq "$previousId" ]; then
#Manage all data value before storage
countUtterance=$((countUtterance + 1))
utteranceValue[$currentId]="${utteranceValue[currentId]} ${arrLine[$posUtterance]}"
acousticScore[$currentId]=`echo ${acousticScore[currentId]} + ${arrLine[$posAcoustic]} | bc`
languageScore[$currentId]=`echo ${languageScore[currentId]} + ${arrLine[$posLanguage]} | bc`
else
#Do stuff on the previous segment before swap
acousticScore[$previousId]=$(echo "scale=2; ${acousticScore[$previousId]}/$countUtterance" | bc)
languageScore[$previousId]=$(echo "scale=2; ${languageScore[$previousId]}/$countUtterance" | bc)
#Init for the next seglment
previousId=$currentId
countUtterance=0
concatUterance=""
acousticScore[previousId]=0
languageScore[previousId]=0
#Init data for the uterance
countUtterance=$((countUtterance + 1))
utteranceValue[$currentId]="${utteranceValue[currentId]} ${arrLine[$posUtterance]}"
acousticScore[$currentId]=`echo ${acousticScore[currentId]} + ${arrLine[$posAcoustic]} | bc`
languageScore[$currentId]=`echo ${languageScore[currentId]} + ${arrLine[$posLanguage]} | bc`
fi
done < "$dataFile"
#Need to manage the last data
acousticScore[$previousId]=$(echo "scale=2; ${acousticScore[$previousId]}/$countUtterance" | bc)
languageScore[$previousId]=$(echo "scale=2; ${languageScore[$previousId]}/$countUtterance" | bc)
#start to 1, no id 0 stored
echo "["
for i in `seq 1 $previousId`; do
acousticScore[$i]=$(echo "scale=2; 1-${acousticScore[$i]}" | bc)
languageScore[$i]=$(echo "scale=2; 1-${languageScore[$i]}" | bc)
echo "{"
echo "\"utterance\":\"${utteranceValue[$i]}\","
echo "\"acousticScore\":0${acousticScore[$i]},"
echo "\"languageScore\":0${languageScore[$i]}"
if [ "$i" -eq "$previousId" ]; then
echo "}"
else
echo "},"
fi
done
echo "]"
\ No newline at end of file
......@@ -69,7 +69,7 @@ class WorkerWebSocket(WebSocketClient):
# Check result
if os.path.isfile('trans/decode_'+self.fileName+'.log'):
with open('trans/decode_'+self.fileName+'.log', 'r') as resultFile:
with open('trans/decode_'+self.fileName+'/indice_confiance.txt', 'r') as resultFile:
result = resultFile.read().strip()
logging.debug("Transcription is: %s" % result)
self.send_result(result)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment