Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
LINAGORA
L
LGS
Labs
kaldi-modelgen
Commits
2316fe66
Commit
2316fe66
authored
Jan 23, 2017
by
Abdelwahab HEBA
Browse files
Update Tcof Normalization
parent
3185ce05
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
6 additions
and
5 deletions
+6
-5
local/data_prep.sh
local/data_prep.sh
+2
-1
local/lm/parseText.py
local/lm/parseText.py
+2
-2
local/parseTcofSync.py
local/parseTcofSync.py
+1
-1
local/prepare_dict.sh
local/prepare_dict.sh
+1
-1
No files found.
local/data_prep.sh
View file @
2316fe66
...
...
@@ -65,6 +65,7 @@ done
# Sort all files
# text
#export LC_ALL=C
cat
$trans
|
sort
-k1
>
$trans
.txt
rm
$trans
mv
$trans
.txt
$trans
...
...
@@ -92,7 +93,7 @@ utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt #|| exit 1
cat
$spk2utt
|
sort
-k1
>
$spk2utt
.txt
rm
$spk2utt
mv
$spk2utt
.txt
$spk2utt
#export LC_ALL=fr_FR.UTF-8
ntrans
=
$(
wc
-l
<
$trans
)
nutt2spk
=
$(
wc
-l
<
$utt2spk
)
!
[
"
$ntrans
"
-eq
"
$nutt2spk
"
]
&&
\
...
...
local/lm/parseText.py
View file @
2316fe66
...
...
@@ -50,8 +50,8 @@ def transformation_text(text):
text
=
re
.
sub
(
r
'\.'
,
' '
,
text
)
#text=re.sub(r"{[^{]+}"," ",text.strip())
# Remove ? ! < > : OK
#<[^\p{L}]|[^\p{L}]>|
#+|
<\p{L}+[ ]|<\p{L}+$
text
=
re
.
sub
(
r
"\?|/|\!|<|>"
,
""
,
text
)
#<[^\p{L}]|[^\p{L}]>|<\p{L}+[ ]|<\p{L}+$
text
=
re
.
sub
(
r
"
:|
\?|/|\!|<|>
|#+
"
,
""
,
text
)
# replace silence character with <sil> : OK
#text=re.sub(r"(\+)", "<sil>", text)
text
=
re
.
sub
(
r
"(\+)"
,
""
,
text
)
...
...
local/parseTcofSync.py
View file @
2316fe66
...
...
@@ -62,7 +62,7 @@ def transformation_text(text):
#text=re.sub(r"{[^{]+}"," ",text.strip())
# Remove ? ! < > : OK
#<[^\p{L}]|[^\p{L}]>|#+|<\p{L}+[ ]|<\p{L}+$
text
=
re
.
sub
(
r
"\?|/|\!|<|>"
,
""
,
text
)
text
=
re
.
sub
(
r
"
:|
\?|/|\!|<|>
|#+
"
,
""
,
text
)
# replace silence character with <sil> : OK
#text=re.sub(r"(\+)", "<sil>", text)
text
=
re
.
sub
(
r
"(\+)"
,
"!SIL"
,
text
)
...
...
local/prepare_dict.sh
View file @
2316fe66
...
...
@@ -138,7 +138,7 @@ if [ $stage -le 3 ]; then
fi
if
[
$stage
-le
4
]
;
then
(
echo
'!sil SIL'
;
echo
'<spoken_noise> SPN'
;
echo
'<
unk
> SPN'
;
echo
'<laugh> LAU'
;
echo
'<noise> NSN'
)
|
\
(
echo
'!sil SIL'
;
echo
'<spoken_noise> SPN'
;
echo
'<
UNK
> SPN'
;
echo
'<laugh> LAU'
;
echo
'<noise> NSN'
)
|
\
cat
-
$lexicon_raw_nosil
|
sort
|
uniq
>
$dst_dir
/lexicon.txt
echo
"Lexicon text file saved as:
$dst_dir
/lexicon.txt"
fi
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment