Commit e2e56a75 authored by Yoann HOUPERT's avatar Yoann HOUPERT
Browse files

Merge branch '6-t3-1-re-training-of-general-model-based-on-normalized-corpus' into 'master'

6 t3 1 re training of general model based on normalized corpus

See merge request !1
parents d8598c77 18a4d701
Pipeline #6685 passed with stage
in 26 seconds
MODELS_PATH=./models/
YAML=/opt/models/gmm_hmm3.yaml
NB_WORKERS=2
FROM node:8-slim
RUN apt-get update && apt-get install -y wget --no-install-recommends \
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
&& apt-get update \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get purge --auto-remove -y curl \
&& rm -rf /src/*.deb
WORKDIR /usr/src/app/linstt-poc
COPY package.json .
COPY yarn.lock .
RUN yarn install
COPY . /usr/src/app/linstt-poc
CMD ["yarn", "start"]
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-poc-socgen
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-poc-socgen).
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
......@@ -19,15 +19,19 @@
*/
const app = require('express')();
const fileUpload = require('express-fileupload');
const bodyParser = require('body-parser');
const config = require('./config.json');
const routes = require('./lib/webserver/routes');
console.log('starting routes linstt-poc-socgen...');
console.log('starting routes linstt-controller...');
app.use(bodyParser.raw({type: 'audio/wav', limit: '200mb'}));
app.use(fileUpload());
app.use('/', routes.routesFactory(config.orchestrator));
app.listen(config.api, () => {
console.log('App listening on port 3000');
});
console.log('routes started linstt-poc-socgen...');
console.log('routes started linstt-controller...');
{
"api": 3000,
"orchestrator": {
"gstreamerURL": "ws://localhost:3001/kaldi",
"speechEnhancementURL": "ws://localhost:3002/enhancement"
"gstreamer": {
"host": "linsttcontroller_kaldi_1",
"port": "80",
"api" : {
"recognize" : "client/dynamic/recognize"
}
},
"speechEnhancement": {
"host": "linsttcontroller_speech-enhencement_1",
"port": "5000",
"api" : {
"upload" : "upload"
},
"config" : "-n"
}
}
}
\ No newline at end of file
linstt-poc-socgen Documentation
linstt-poc Documentation
You will find in this folder all the required information to configure, use, develop the linstt-poc-socgen project.
You will find in this folder all the required information to configure, use, develop the linstt-poc project.
version: '3.2'
services:
socgen:
build: .
image: linagora/socgen
depends_on:
- speech-enhencement
- kaldi
volumes:
- ./media:/opt/media
ports:
- "3000:3000"
speech-enhencement:
image: linagora/speech-enhencement
expose:
- "5000"
ports:
- "5000:5000"
kaldi:
image: linagora/kaldis
expose:
- "80"
volumes:
- ${MODELS_PATH}:/opt/models
env_file: .env
environment:
- NB_WORKERS
- YAML
- MODELS_PATH
ports:
- "80:80"
\ No newline at end of file
version: '3.2'
services:
controller:
build: .
image: linagora/controller
depends_on:
- speech-enhencement
- kaldi
volumes:
- ./media:/opt/media
ports:
- "3000:3000"
speech-enhencement:
image: linagora/speech-enhencement
expose:
- "5000"
kaldi:
image: linagora/kaldi
expose:
- "80"
volumes:
- ${MODELS_PATH}:/opt/models
env_file: .env
environment:
- NB_WORKERS
- YAML
- MODELS_PATH
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
module.exports = config => {
const request = require('request');
return {
enhancement: (wavFile, enhancementlCallback) => {
const url = 'http://' + config.host + ':' + config.port + '/' + config.api.upload;
const options = {
url,
formData: {
wavFile: {
value: wavFile.data,
options: {
filename: 'wavFile',
contentType: 'audio/x-wav'
}
},
msg: '-n'
},
encoding: null
};
request.post(options, enhancementlCallback);
return wavFile;
}
};
};
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-poc-socgen
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-poc-socgen).
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
......@@ -18,13 +18,12 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const transcript = audio => {
const enhancedAudio = audio;
console.log('Enhancement of the audio');
return enhancedAudio;
};
module.exports = {
transcript
module.exports = config => {
const request = require('request');
return {
transcript: (audio, sttCallback) => {
const url = 'http://' + config.host + ':' + config.port + '/' + config.api.recognize;
return request.post({url, body: audio}, sttCallback);
}
};
};
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-poc-socgen
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-poc-socgen).
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
......@@ -18,13 +18,17 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
const enhancement = audio => {
console.log('transcript of the audio');
console.log('Do stuff' + audio);
const isFileValid = audio => {
if (audio === undefined) {
return false;
}
return 'some text';
if (audio.mimetype === 'audio/wave' || audio.mimetype === 'audio/x-wav') {
return true;
}
return false;
};
module.exports = {
enhancement
isFileValid
};
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
'use strict';
describe('checker should if file is valid', () => {
beforeEach(() => {
/* eslint-disable import/no-unassigned-import */
global.checker = require('./checker.js');
/* eslint-enable */
});
it('should define checker', () => {
expect(global.checker).toBeDefined();
});
it('should give false when undefined', () => {
expect(global.checker.isFileValid()).toBe(false);
});
it('should give false when not an audio', () => {
expect(global.checker.isFileValid('random stuff')).toBe(false);
});
it('should be false when the audio is not a wav', () => {
const audio = {
mimetype: 'audio/webm',
data: 'randomByte'
};
expect(global.checker.isFileValid(audio)).toBe(false);
});
it('should be true when the audio is wav', () => {
const audioWav = {
mimetype: 'audio/wave',
data: 'randomByte'
};
const audioXwav = {
mimetype: 'audio/x-wav',
data: 'randomByte'
};
expect(global.checker.isFileValid(audioWav)).toBe(true);
expect(global.checker.isFileValid(audioXwav)).toBe(true);
});
});
/*
* Copyright (c) 2017 Linagora.
*
* This file is part of linstt-poc-socgen
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-poc-socgen).
* This file is part of linstt-controller
* (see https://ci.linagora.com/linagora/lgs/labs/linstt-controller).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
......@@ -21,18 +21,39 @@
const routesFactory = config => {
/* eslint new-cap: ["error", { "capIsNew": false }] */
const routes = require('express').Router();
const enhancer = require('../speech-enhancement');
const stt = require('../speech-to-text');
const enhancer = require('../controller/speech-enhancement')(config.speechEnhancement);
const stt = require('../controller/speech-to-text')(config.gstreamer);
const checker = require('../util/checker');
routes.post('/api/transcript', (req, res) => {
console.log(config);
// TODO manage first call to config.speechEnhancementURL
enhancer.enhancement();
if (!req.files) {
return res.status(400).send('No files were uploaded.');
}
if (!checker.isFileValid(req.files.wav)) {
return res.status(400).send('File is not valid.');
}
// TODO manage first call to config.gstreamerURL
stt.transcript();
enhancer.enhancement(req.files.wav, (err, httpResponse, body) => {
if (err) {
return res.status(500).send('Error while enhancing audio.' + err);
}
res.status(200).json({message: 'transcript call'});
stt.transcript(body, (err, httpResponse, body) => {
if (err) {
return res.status(500).send('Error while transcript the audio.' + err);
} else if (body.indexOf('No workers available') !== -1) {
return res.status(500).send('No worker available for the moment.');
}
const json = JSON.parse(body);
if (json.status === 5) {
return res.status(500).send('Error while transcript the audio.');
}
return res.status(200).json({message: 'transcript done', transcript: json});
}
);
}
);
});
return routes;
};
......
{
"name": "linstt-poc-socgen",
"name": "linstt-poc",
"version": "0.0.1",
"description": "An orchestrator for socgen",
"description": "An orchestrator for enhancement - transcript",
"main": "app.js",
"scripts": {
"start": "node app.js",
"test": "xo && jest",
"test:watch": "jest --watch",
"ci": "yarn && xo && jest --runInBand"
"ci": "yarn && xo && jest --runInBand",
"xo:fix": "xo --fix"
},
"dependencies": {
"body-parser": "^1.18.2",
"express": "^4.16.2",
"xmlhttprequest": "1.8.0"
"express-fileupload": "^0.3.0",
"request": "^2.83.0"
},
"devDependencies": {
"jest": "20.0.0",
......@@ -20,11 +22,14 @@
"sinon": "^4.0.2"
},
"xo": {
"space": true
"space": true,
"envs": [
"jest"
]
},
"repository": {
"type": "git",
"url": "https://ci.linagora.com/linagora/lgs/labs/linstt-poc-socgen.git"
"url": "https://ci.linagora.com/linagora/lgs/labs/linstt-poc.git"
},
"author": "Linagora Folks",
"license": "AGPL-3.0"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment