Commit b9289bf5 authored by Sonia BADENE's avatar Sonia BADENE

Initial simple Speech application built using Sphinx-4

parent ca263c0b
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>sbadene</groupId>
<artifactId>reco</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>reco</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>edu.cmu.sphinx</groupId>
<artifactId>sphinx4-core</artifactId>
<version>5prealpha-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.cmu.sphinx</groupId>
<artifactId>sphinx4-data</artifactId>
<version>5prealpha-SNAPSHOT</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>snapshots-repo</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
</project>
\ No newline at end of file
package RecoPack;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.SpeechResult;
import edu.cmu.sphinx.api.StreamSpeechRecognizer;
import edu.cmu.sphinx.decoder.adaptation.Stats;
import edu.cmu.sphinx.decoder.adaptation.Transform;
/**
* A simple example that shows how to transcribe a continuous .wav audio file (an
* InputStream as the speech source) that has multiple utterances in it.
* This script split the audio and transcript each segment.
*/
public class Transcriber {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
File f = null;
File[] paths;
// gen (generated) is the folder where all the output is saved (transcription and audio files for this app)
File gen = new File("./gen");
gen.mkdirs();
File trans = new File("./gen/transcription.txt");
BufferedWriter bw = new BufferedWriter(new FileWriter(trans));
PrintWriter pw = new PrintWriter(bw);
// create new folder for the segmented audio
f = new File("./gen/ResultsWav");
f.mkdirs();
// Load model
System.out.println("# Loading models...");
configuration
.setAcousticModelPath("./data/cmusphinxFR52");
configuration
.setDictionaryPath("./data/LiumLM/words_dict.utf8");
configuration
.setLanguageModelPath("./data/LiumLM/smallFrenchLium.lm.bin");
StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(configuration);
// Segmenter l'input .wav en plusieurs segments
try
{
Runtime rtime = Runtime.getRuntime();
Process child = rtime.exec("bash ./src/script/split.sh ./data/Wav/39_45_eva_14right.wav ./src/script/segments ./gen/ResultsWav");
InputStream is = child.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
// returns pathnames for files and directory
paths = f.listFiles();
// for each pathname in pathname array
for(File path:paths) {
// prints file and directory paths
System.out.println("## File and directory paths...");
System.out.println(path);
InputStream stream = new FileInputStream(path);
stream.skip(44);
// Simple recognition with generic model
// And Live adaptation to speaker with speaker profiles
// Stats class is used to collect speaker-specific data
Stats stats = recognizer.createStats(1);
System.out.println("Recognition...");
recognizer.startRecognition(stream);
System.out.println("Result running...");
SpeechResult result;
System.out.println("Collecting Speaker-specific data.");
while ((result = recognizer.getResult()) != null) {
stats.collect(result);
}
recognizer.stopRecognition();
// Transform represents the speech profile
Transform transform = stats.createTransform();
recognizer.setTransform(transform);
// Decode again with updated transform
InputStream streamMain = new FileInputStream(path);
streamMain.skip(44);
recognizer.startRecognition(streamMain);
while ((result = recognizer.getResult()) != null) {
System.out.format("Hypothesis: %s\n", result.getHypothesis());
pw.print(path.getName()+" ");
pw.println(result.getHypothesis());
pw.flush();
}
recognizer.stopRecognition();
}
pw.close();
}
}
package RecoPack;
import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.LiveSpeechRecognizer;
import edu.cmu.sphinx.api.SpeechResult;
/**
* LiveSpeechRecognizer uses microphone as the speech source.
*/
public class test {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
// Load model
System.out.println("# Loading models...");
configuration
.setAcousticModelPath("./data/cmusphinxFR52");
configuration
.setDictionaryPath("./data/LiumLM/words_dict.utf8");
configuration
.setLanguageModelPath("./data/LiumLM/smallFrenchLium.lm.bin");
LiveSpeechRecognizer recognizer = new LiveSpeechRecognizer(configuration);
SpeechResult result=null;
// Start recognition process pruning previously cached data.
// recognizer.startRecognition(true);
// SpeechResult result = recognizer.getResult();
// // Pause recognition process. It can be resumed then with startRecognition(false).
// recognizer.stopRecognition();
System.out.println("Say something in French Language ");
// loop the recognition until the programm exits.
while (true) {
System.out.println("Start speaking. Press Ctrl-C to quit.\n");
result = recognizer.getResult();
if (result != null) {
String resultText = result.getHypothesis();
System.out.println("You said: " + resultText + '\n');
} else {
System.out.println("I can't hear what you said.\n");
}
}
// while ((result = recognizer.getResult()) != null) {
// System.out.format("Hypothesis: %s\n", result.getHypothesis());
// }
// System.exit(0);
}
}
This diff is collapsed.
#!/bin/bash
# wav de 15 min 15*60 = 900s, chaque tram 10sec ===> 90 segments
#for i in $(seq 1 90); do
#ffmpeg -ss $((($i-1) * 10)) -t 10 -i a.wav split_debat/deb_seg$i.wav
# we assume that audio_file is 16k mono
#done
audio_file=$1
rep=$3
if [ ! -d $rep ]; then
mkdir -p $rec
fi
#ffmpeg -i $audio_file -ar 16000 -ac 1 a.wav
i=1
while read -r line
do
# echo $line
nb_seg=$(printf "%03d" $i)
deb_seg=`echo $line | awk '{print $3}'`
name_seg=`echo $line | awk '{print $1}'`
duration_seg=`echo $line | awk '{print $4-$3}'`
if ((`bc <<< "$duration_seg<10.0"`)); then
sox $audio_file -t wav $rep/$name_seg trim $deb_seg 00:$duration_seg
else
echo "le fichier dépasse 10sec"
echo $name_seg
echo $duration_seg
echo "============="
fi
# if [ "$i" -eq "50" ]; then
# exit;
# fi
i=$(($i+1))
done < $2
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment