Commit ad82c82e authored by Kelly Davis's avatar Kelly Davis

Fixed #256

parent 94ab7fc2
......@@ -49,12 +49,6 @@
"resolved": "https://registry.npmjs.org/anymatch/-/anymatch-1.3.0.tgz",
"integrity": "sha1-o+Uvo5FoyCX/V7AkgSbOWo/5VQc="
},
"ap": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/ap/-/ap-0.2.0.tgz",
"integrity": "sha1-rglCYAspkS8NKxTsYMRejzMLYRA=",
"dev": true
},
"aproba": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/aproba/-/aproba-1.1.2.tgz",
......@@ -183,16 +177,6 @@
"resolved": "https://registry.npmjs.org/beeper/-/beeper-1.1.1.tgz",
"integrity": "sha1-5tXqjF2tABMEpwsiY4RH9pyy+Ak="
},
"better-queue": {
"version": "3.8.5",
"resolved": "https://registry.npmjs.org/better-queue/-/better-queue-3.8.5.tgz",
"integrity": "sha1-/ALwhVVuw9IvZx7pFBArPPmxXeY="
},
"better-queue-memory": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/better-queue-memory/-/better-queue-memory-1.0.2.tgz",
"integrity": "sha1-qm0WmqHQzHdAkYXLnLXH3CUbzUE="
},
"binary-extensions": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-1.8.0.tgz",
......@@ -243,12 +227,6 @@
"resolved": "https://registry.npmjs.org/buffer-shims/-/buffer-shims-1.0.0.tgz",
"integrity": "sha1-mXjOMXOIxkmth5MCjDR37wRKi1E="
},
"buffer-writer": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/buffer-writer/-/buffer-writer-1.0.1.tgz",
"integrity": "sha1-Iqk2kB4wKa/NdUfrRIfOtpejvwg=",
"dev": true
},
"camelcase": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-1.2.1.tgz",
......@@ -1374,12 +1352,6 @@
"resolved": "https://registry.npmjs.org/gaze/-/gaze-0.5.2.tgz",
"integrity": "sha1-QLcJU30k0dRXZ9takIaJ3+aaxE8="
},
"generic-pool": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/generic-pool/-/generic-pool-2.4.3.tgz",
"integrity": "sha1-eAw29p360FpaBF3Te+etyhGk9v8=",
"dev": true
},
"getpass": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz",
......@@ -2042,12 +2014,6 @@
"integrity": "sha1-9plf4PggOS9hOWvolGJAe7dxaOQ=",
"dev": true
},
"libpq": {
"version": "1.8.7",
"resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.7.tgz",
"integrity": "sha1-wt6xIeKPf4S9OyRRr/9otmY+dPk=",
"dev": true
},
"liftoff": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/liftoff/-/liftoff-2.3.0.tgz",
......@@ -2352,11 +2318,6 @@
"integrity": "sha1-GfYZWRUZ8JZ2mlupqG5u7sgjw88=",
"dev": true
},
"node-eta": {
"version": "0.9.0",
"resolved": "https://registry.npmjs.org/node-eta/-/node-eta-0.9.0.tgz",
"integrity": "sha1-n7CwmbzSoCGUDmA8ZCVNwAPZp6g="
},
"node-pre-gyp": {
"version": "0.6.32",
"resolved": "https://registry.npmjs.org/node-pre-gyp/-/node-pre-gyp-0.6.32.tgz",
......@@ -2463,12 +2424,6 @@
"integrity": "sha1-yOysCUInzfdqMWh07QXifMk5oOA=",
"dev": true
},
"packet-reader": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/packet-reader/-/packet-reader-0.3.1.tgz",
"integrity": "sha1-zWLmCvjX/qinBexP+ZCHHEaHHyc=",
"dev": true
},
"parse-filepath": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/parse-filepath/-/parse-filepath-1.0.1.tgz",
......@@ -2521,92 +2476,6 @@
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-0.2.0.tgz",
"integrity": "sha1-M+8wxcd9TqIcWlOGnZG1bY8lVeU="
},
"pg": {
"version": "6.2.4",
"resolved": "https://registry.npmjs.org/pg/-/pg-6.2.4.tgz",
"integrity": "sha1-T37ecCQel1BmJ9XWB4NgcBpkfEU=",
"dev": true,
"dependencies": {
"semver": {
"version": "4.3.2",
"resolved": "https://registry.npmjs.org/semver/-/semver-4.3.2.tgz",
"integrity": "sha1-x6BxWKgL7dBSNVt3DYLWZA+AO+c=",
"dev": true
}
}
},
"pg-connection-string": {
"version": "0.1.3",
"resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-0.1.3.tgz",
"integrity": "sha1-2hhHsglA5C7hSSvq9l1J2RskXfc=",
"dev": true
},
"pg-native": {
"version": "1.10.1",
"resolved": "https://registry.npmjs.org/pg-native/-/pg-native-1.10.1.tgz",
"integrity": "sha1-lOYcy7hafzQ2suUmMVx1gRB/5Aw=",
"dev": true,
"dependencies": {
"isarray": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
"integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=",
"dev": true
},
"pg-types": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.6.0.tgz",
"integrity": "sha1-OHKg8ZkUMCVJf07ipl/a8A1+qLM=",
"dev": true
},
"readable-stream": {
"version": "1.0.31",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.31.tgz",
"integrity": "sha1-jyUC4LyeOw2huUUgqrtOJgPsr64=",
"dev": true
},
"string_decoder": {
"version": "0.10.31",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
"integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=",
"dev": true
}
}
},
"pg-pool": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-1.7.1.tgz",
"integrity": "sha1-QhEFy3Rpl53MSNb8T+P+RllDdDc=",
"dev": true,
"dependencies": {
"object-assign": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.0.tgz",
"integrity": "sha1-ejs9DpgGPUP0wD8uiubNUahog6A=",
"dev": true
}
}
},
"pg-types": {
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.12.0.tgz",
"integrity": "sha1-itO3uJfj/UY+Yt4kGtX8ZAtKZvA=",
"dev": true
},
"pgpass": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.2.tgz",
"integrity": "sha1-Knu0G2BltnkH6R2hsHwYR8h3swY=",
"dev": true,
"dependencies": {
"split": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/split/-/split-1.0.0.tgz",
"integrity": "sha1-xDlc5oOrzSVLwo/h2rtuXCfc/64=",
"dev": true
}
}
},
"pidusage": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/pidusage/-/pidusage-1.1.5.tgz",
......@@ -2697,30 +2566,6 @@
}
}
},
"postgres-array": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-1.0.2.tgz",
"integrity": "sha1-jgsy6wO/d6XAp4UeBEHBaaJWojg=",
"dev": true
},
"postgres-bytea": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.0.tgz",
"integrity": "sha1-AntTPAqokOJtFy1Hz5zOzFIazTU=",
"dev": true
},
"postgres-date": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.3.tgz",
"integrity": "sha1-4tiXAu/bJY/52c7g/pG9BpdSV6g=",
"dev": true
},
"postgres-interval": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.1.0.tgz",
"integrity": "sha1-EDHnusNFZBMoYq3J62xtLzqnW7Q=",
"dev": true
},
"preact": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/preact/-/preact-8.1.0.tgz",
......@@ -3148,7 +2993,8 @@
"through": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
"integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU="
"integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=",
"dev": true
},
"through2": {
"version": "2.0.3",
......
......@@ -14,20 +14,26 @@ const CONFIG_PATH = path.resolve(__dirname, '../../..', 'config.json');
const config = require(CONFIG_PATH);
const BUCKET_NAME = config.BUCKET_NAME || 'common-voice-corpus';
export default class Files {
private s3: any;
private files: {
// fileGlob: [
// sentence: 'the text of the sentenct'
// sentence: 'the text of the sentence',
// votes: vote count for fileGlob
// ]
};
private paths: string[];
private voteSets: {
// vote: Set of globs with this vote count
};
private continuationToken: string;
constructor() {
this.s3 = new AWS.S3();
this.files = {};
this.paths = [];
this.voteSets = {};
this.continuationToken = undefined;
this.init().then(() => {
setInterval(this.init.bind(this), REFRESH_INTERVAL);
......@@ -58,6 +64,34 @@ export default class Files {
});
}
/**
* Read votes in from s3.
*/
private getVotes(glob: string, key: string) {
let prefix = glob + '-by-';
let params = {Bucket: BUCKET_NAME, Prefix: prefix};
this.s3.listObjectsV2(params, (err: any, s3Data: any) => {
if (err) {
console.error('Could not read votes from s3', key, err);
return;
}
let votes = s3Data['Contents'].length;
if (!this.voteSets[this.files[glob].votes]) {
this.voteSets[this.files[glob].votes] = {};
}
delete this.voteSets[this.files[glob].votes][glob];
this.files[glob].votes = votes;
if (!this.voteSets[this.files[glob].votes]) {
this.voteSets[this.files[glob].votes] = {};
}
this.voteSets[this.files[glob].votes][glob] = glob;
});
}
/**
* Load a list of files from S3.
*/
......@@ -82,10 +116,12 @@ export default class Files {
// Track gobs and sentence of the voice clips.
if (!this.files[glob]) {
this.files[glob] = {
sentence: null
sentence: null,
votes: 0
}
this.getSentence(glob, key);
}
this.getVotes(glob, key);
}
});
......@@ -126,14 +162,18 @@ export default class Files {
return Promise.reject('No files not from us.');
}
// Make a reasonable effort to find a valid sentence
for(let attempt = 0; attempt < items.length; attempt++) {
let glob = items[Math.floor(Math.random()*items.length)];
let key = glob + MP3_EXT;
let info = this.files[glob];
if (info && info.sentence && /\S/.test(info.sentence) && key) {
return Promise.resolve([key, info.sentence]);
let votes = Object.keys(this.voteSets).sort();
for (let voteIndex = 0; voteIndex < votes.length; voteIndex++) {
let vote = votes[voteIndex];
let currentVoteSetKeys = Object.keys(this.voteSets[vote]);
for (let index = 0; index < currentVoteSetKeys.length; index++) {
let glob = currentVoteSetKeys[index];
let key = glob + MP3_EXT;
let info = this.files[glob];
if (info && info.sentence && /\S/.test(info.sentence) && key) {
return Promise.resolve([key, info.sentence]);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment