Commit 19107f65 authored by Ho Yin Chan's avatar Ho Yin Chan
Browse files

trunk:src remove since not all data are released

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3611 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent f55e8d0f
### 16k wordlist partial close LM
tri1/decode_eval/cer_10:%CER 50.28 [ 3802 / 7562, 1547 ins, 403 del, 1852 sub ] # triphones
tri2/decode_eval/cer_10:%CER 47.09 [ 3561 / 7562, 1405 ins, 414 del, 1742 sub ] # triphones (better alignment)
tri3a/decode_eval/cer_10:%CER 44.18 [ 3341 / 7562, 1113 ins, 441 del, 1787 sub ] # LDA+MLLT
tri4a/decode_eval/cer_10:%CER 30.23 [ 2286 / 7562, 530 ins, 492 del, 1264 sub ] # LDA+MLLT+SAT
tri4a_20k/decode_eval/cer_10:%CER 32.43 [ 2452 / 7562, 537 ins, 480 del, 1435 sub ] # LDA+MLLF+SAT (small system)
tri5a/decode_eval/cer_10:%CER 28.89 [ 2185 / 7562, 498 ins, 517 del, 1170 sub ] # LDA+MLLT+SAT (better alignment)
tri5a_fmmi_b0.1/decode_eval_iter1/cer_10:%CER 28.00 [ 2117 / 7562, 460 ins, 524 del, 1133 sub ] # feature space MMI + boosted MMI
tri5a_fmmi_b0.1/decode_eval_iter2/cer_10:%CER 27.47 [ 2077 / 7562, 438 ins, 548 del, 1091 sub ]
tri5a_fmmi_b0.1/decode_eval_iter3/cer_10:%CER 26.59 [ 2011 / 7562, 447 ins, 539 del, 1025 sub ]
tri5a_fmmi_b0.1/decode_eval_iter4/cer_10:%CER 29.91 [ 2262 / 7562, 619 ins, 516 del, 1127 sub ]
tri5a_fmmi_b0.1/decode_eval_iter5/cer_10:%CER 29.24 [ 2211 / 7562, 655 ins, 479 del, 1077 sub ]
tri5a_fmmi_b0.1/decode_eval_iter6/cer_10:%CER 27.10 [ 2049 / 7562, 552 ins, 483 del, 1014 sub ]
tri5a_fmmi_b0.1/decode_eval_iter7/cer_10:%CER 24.97 [ 1888 / 7562, 462 ins, 549 del, 877 sub ]
tri5a_fmmi_b0.1/decode_eval_iter8/cer_10:%CER 25.23 [ 1908 / 7562, 445 ins, 613 del, 850 sub ]
tri5a_mmi_b0.1/decode_eval1/cer_10:%CER 24.93 [ 1885 / 7562, 408 ins, 466 del, 1011 sub ] # boosted MMI
tri5a_mmi_b0.1/decode_eval2/cer_10:%CER 23.25 [ 1758 / 7562, 370 ins, 486 del, 902 sub ]
tri5a_mmi_b0.1/decode_eval3/cer_10:%CER 23.64 [ 1788 / 7562, 402 ins, 501 del, 885 sub ]
tri5a_mmi_b0.1/decode_eval4/cer_10:%CER 23.58 [ 1783 / 7562, 392 ins, 561 del, 830 sub ] # <= best GMM model was obtained here
sgmm_5a/decode_eval/cer_10:%CER 26.40 [ 1996 / 7562, 418 ins, 701 del, 877 sub ] # SGMM
sgmm_5a_mmi_b0.1/decode_eval1/cer_10:%CER 24.93 [ 1885 / 7562, 401 ins, 597 del, 887 sub ] # boosted MMI on SGMM
sgmm_5a_mmi_b0.1/decode_eval2/cer_10:%CER 24.52 [ 1854 / 7562, 386 ins, 596 del, 872 sub ]
sgmm_5a_mmi_b0.1/decode_eval3/cer_10:%CER 23.79 [ 1799 / 7562, 378 ins, 593 del, 828 sub ]
sgmm_5a_mmi_b0.1/decode_eval4/cer_10:%CER 23.87 [ 1805 / 7562, 380 ins, 597 del, 828 sub ]
nnet_8m_6l/decode_eval_iter50/cer_10:%CER 33.25 [ 2514 / 7562, 435 ins, 750 del, 1329 sub ] # CPU based neural network
nnet_8m_6l/decode_eval_iter100/cer_10:%CER 30.40 [ 2299 / 7562, 543 ins, 476 del, 1280 sub ]
nnet_8m_6l/decode_eval_iter150/cer_10:%CER 26.74 [ 2022 / 7562, 423 ins, 578 del, 1021 sub ]
nnet_8m_6l/decode_eval_iter200/cer_10:%CER 26.20 [ 1981 / 7562, 421 ins, 546 del, 1014 sub ]
nnet_8m_6l/decode_eval_iter210/cer_10:%CER 26.62 [ 2013 / 7562, 436 ins, 569 del, 1008 sub ]
nnet_8m_6l/decode_eval_iter220/cer_10:%CER 26.41 [ 1997 / 7562, 412 ins, 545 del, 1040 sub ]
nnet_8m_6l/decode_eval_iter230/cer_10:%CER 26.98 [ 2040 / 7562, 435 ins, 614 del, 991 sub ]
nnet_8m_6l/decode_eval_iter240/cer_10:%CER 27.86 [ 2107 / 7562, 468 ins, 552 del, 1087 sub ]
nnet_8m_6l/decode_eval_iter250/cer_10:%CER 26.01 [ 1967 / 7562, 409 ins, 565 del, 993 sub ]
nnet_8m_6l/decode_eval_iter260/cer_10:%CER 26.61 [ 2012 / 7562, 419 ins, 555 del, 1038 sub ]
nnet_8m_6l/decode_eval_iter270/cer_10:%CER 25.72 [ 1945 / 7562, 405 ins, 533 del, 1007 sub ]
nnet_8m_6l/decode_eval_iter280/cer_10:%CER 27.43 [ 2074 / 7562, 424 ins, 605 del, 1045 sub ]
nnet_8m_6l/decode_eval_iter290/cer_10:%CER 26.37 [ 1994 / 7562, 410 ins, 572 del, 1012 sub ]
nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ] # 6 hidden layers neural network
nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ] # 6 hidden layers neural network (nnet2 script, 1024 neurons)
nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ] # 3 hidden layers neural network
nnet_tanh_3l/decode_eval/cer_10:%CER 22.11 [ 1672 / 7562, 391 ins, 489 del, 792 sub ] # 3 hidden layers neural network (nnet2 script, 1024 neurons)
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # 6 layers DNN - pretrained RBM, cross entropy trained DNN
tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN (1024 neurons)
tri5a_pretrain-dbn_dnn_smbr/decode_it2/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_it3/cer_10:%CER 18.62 [ 1408 / 7562, 313 ins, 446 del, 649 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_it4/cer_10:%CER 18.66 [ 1411 / 7562, 307 ins, 458 del, 646 sub ]
tri5a_pretrain-dbn_dnn2/decode/cer_10:%CER 20.56 [ 1555 / 7562, 388 ins, 463 del, 704 sub ] # (2048 neurons) <= doesn't outperform 1024 neurons system
tri5a_pretrain-dbn_dnn_smbr2/decode_it1/cer_10:%CER 19.06 [ 1441 / 7562, 319 ins, 472 del, 650 sub ] # sMBR trained DNN <= converge quickly
tri5a_pretrain-dbn_dnn_smbr2/decode_it2/cer_10:%CER 19.08 [ 1443 / 7562, 315 ins, 470 del, 658 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it3/cer_10:%CER 19.00 [ 1437 / 7562, 315 ins, 462 del, 660 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it4/cer_10:%CER 18.96 [ 1434 / 7562, 314 ins, 470 del, 650 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it5/cer_10:%CER 18.95 [ 1433 / 7562, 317 ins, 460 del, 656 sub ]
### 16K wordlist close LM, the perplexity of the LM was optimized with the sentences of evaluation data
tri1/decode_eval_closelm/cer_10:%CER 46.69 [ 3531 / 7562, 1205 ins, 407 del, 1919 sub ]
tri2/decode_eval_closelm/cer_10:%CER 44.18 [ 3341 / 7562, 1136 ins, 421 del, 1784 sub ]
tri3a/decode_eval_closelm/cer_10:%CER 51.53 [ 3897 / 7562, 1218 ins, 467 del, 2212 sub ]
tri4a/decode_eval_closelm/cer_10:%CER 22.81 [ 1725 / 7562, 411 ins, 480 del, 834 sub ]
tri4a_20k/decode_eval_closelm/cer_10:%CER 25.17 [ 1903 / 7562, 439 ins, 467 del, 997 sub ]
tri5a/decode_eval_closelm/cer_10:%CER 22.60 [ 1709 / 7562, 384 ins, 520 del, 805 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter1/cer_10:%CER 21.81 [ 1649 / 7562, 363 ins, 524 del, 762 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter2/cer_10:%CER 21.17 [ 1601 / 7562, 358 ins, 487 del, 756 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter3/cer_10:%CER 21.81 [ 1649 / 7562, 387 ins, 473 del, 789 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter4/cer_10:%CER 27.07 [ 2047 / 7562, 519 ins, 493 del, 1035 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter5/cer_10:%CER 24.76 [ 1872 / 7562, 472 ins, 478 del, 922 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter6/cer_10:%CER 22.51 [ 1702 / 7562, 389 ins, 516 del, 797 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter7/cer_10:%CER 20.46 [ 1547 / 7562, 345 ins, 486 del, 716 sub ]
tri5a_fmmi_b0.1/decode_eval_closelm_iter8/cer_10:%CER 20.75 [ 1569 / 7562, 330 ins, 549 del, 690 sub ]
tri5a_mmi_b0.1/decode_eval_closelm1/cer_10:%CER 19.08 [ 1443 / 7562, 320 ins, 433 del, 690 sub ]
tri5a_mmi_b0.1/decode_eval_closelm2/cer_10:%CER 17.83 [ 1348 / 7562, 305 ins, 438 del, 605 sub ]
tri5a_mmi_b0.1/decode_eval_closelm3/cer_10:%CER 19.72 [ 1491 / 7562, 381 ins, 449 del, 661 sub ]
tri5a_mmi_b0.1/decode_eval_closelm4/cer_10:%CER 18.34 [ 1387 / 7562, 312 ins, 465 del, 610 sub ]
sgmm_5a/decode_eval_closelm/cer_10:%CER 23.00 [ 1739 / 7562, 473 ins, 633 del, 633 sub ]
sgmm_5a_mmi_b0.1/decode_eval_closelm1/cer_10:%CER 21.48 [ 1624 / 7562, 459 ins, 531 del, 634 sub ]
sgmm_5a_mmi_b0.1/decode_eval_closelm2/cer_10:%CER 21.17 [ 1601 / 7562, 449 ins, 530 del, 622 sub ]
sgmm_5a_mmi_b0.1/decode_eval_closelm3/cer_10:%CER 21.05 [ 1592 / 7562, 448 ins, 530 del, 614 sub ]
sgmm_5a_mmi_b0.1/decode_eval_closelm4/cer_10:%CER 21.03 [ 1590 / 7562, 446 ins, 530 del, 614 sub ]
nnet_8m_6l/decode_eval_closelm_iter50/cer_10:%CER 27.12 [ 2051 / 7562, 383 ins, 615 del, 1053 sub ]
nnet_8m_6l/decode_eval_closelm_iter100/cer_10:%CER 24.33 [ 1840 / 7562, 466 ins, 462 del, 912 sub ]
nnet_8m_6l/decode_eval_closelm_iter150/cer_10:%CER 21.34 [ 1614 / 7562, 364 ins, 476 del, 774 sub ]
nnet_8m_6l/decode_eval_closelm_iter200/cer_10:%CER 20.56 [ 1555 / 7562, 332 ins, 485 del, 738 sub ]
nnet_8m_6l/decode_eval_closelm_iter210/cer_10:%CER 20.67 [ 1563 / 7562, 349 ins, 494 del, 720 sub ]
nnet_8m_6l/decode_eval_closelm_iter220/cer_10:%CER 21.98 [ 1662 / 7562, 357 ins, 531 del, 774 sub ]
nnet_8m_6l/decode_eval_closelm_iter230/cer_10:%CER 22.30 [ 1686 / 7562, 360 ins, 539 del, 787 sub ]
nnet_8m_6l/decode_eval_closelm_iter240/cer_10:%CER 22.19 [ 1678 / 7562, 376 ins, 508 del, 794 sub ]
nnet_8m_6l/decode_eval_closelm_iter250/cer_10:%CER 21.52 [ 1627 / 7562, 354 ins, 523 del, 750 sub ]
nnet_8m_6l/decode_eval_closelm_iter260/cer_10:%CER 20.97 [ 1586 / 7562, 347 ins, 499 del, 740 sub ]
nnet_8m_6l/decode_eval_closelm_iter270/cer_10:%CER 20.50 [ 1550 / 7562, 348 ins, 465 del, 737 sub ]
nnet_8m_6l/decode_eval_closelm_iter280/cer_10:%CER 21.44 [ 1621 / 7562, 354 ins, 520 del, 747 sub ]
nnet_8m_6l/decode_eval_closelm_iter290/cer_10:%CER 20.40 [ 1543 / 7562, 323 ins, 492 del, 728 sub ]
nnet_8m_6l/decode_eval_closelm/cer_10:%CER 20.68 [ 1564 / 7562, 351 ins, 483 del, 730 sub ]
nnet_tanh_6l/decode_eval_closelm/cer_10:%CER 17.10 [ 1293 / 7562, 337 ins, 448 del, 508 sub ]
nnet_4m_3l/decode_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 335 ins, 439 del, 523 sub ]
nnet_tanh_3l/decode_eval_closelm/cer_10:%CER 17.22 [ 1302 / 7562, 349 ins, 434 del, 519 sub ]
tri5a_pretrain-dbn_dnn/decode_closelm/cer_10:%CER 16.54 [ 1251 / 7562, 346 ins, 413 del, 492 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1/cer_10:%CER 15.31 [ 1158 / 7562, 280 ins, 410 del, 468 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2/cer_10:%CER 15.30 [ 1157 / 7562, 279 ins, 408 del, 470 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it3/cer_10:%CER 15.52 [ 1174 / 7562, 280 ins, 408 del, 486 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it4/cer_10:%CER 15.62 [ 1181 / 7562, 278 ins, 412 del, 491 sub ]
tri5a_pretrain-dbn_dnn2/decode_closelm_xeon3.5/cer_10:%CER 17.06 [ 1290 / 7562, 347 ins, 433 del, 510 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it1/cer_10:%CER 15.87 [ 1200 / 7562, 292 ins, 436 del, 472 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it2/cer_10:%CER 15.71 [ 1188 / 7562, 285 ins, 433 del, 470 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it3/cer_10:%CER 15.76 [ 1192 / 7562, 286 ins, 430 del, 476 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it4/cer_10:%CER 15.74 [ 1190 / 7562, 287 ins, 428 del, 475 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it5/cer_10:%CER 15.70 [ 1187 / 7562, 286 ins, 428 del, 473 sub ]
##### Below are the results of wide beam decoding #####
### 16k wordlist partial close LM
exp/tri5a/decode_wide_eval/cer_10:%CER 27.23 [ 2059 / 7562, 465 ins, 517 del, 1077 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval/cer_10:%CER 21.93 [ 1658 / 7562, 351 ins, 565 del, 742 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_1/cer_10:%CER 24.04 [ 1818 / 7562, 391 ins, 472 del, 955 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_2/cer_10:%CER 22.43 [ 1696 / 7562, 358 ins, 480 del, 858 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_3/cer_10:%CER 21.73 [ 1643 / 7562, 353 ins, 507 del, 783 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_4/cer_10:%CER 21.93 [ 1658 / 7562, 351 ins, 565 del, 742 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter1/cer_10:%CER 26.30 [ 1989 / 7562, 435 ins, 519 del, 1035 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter2/cer_10:%CER 25.52 [ 1930 / 7562, 412 ins, 529 del, 989 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter3/cer_10:%CER 24.80 [ 1875 / 7562, 389 ins, 534 del, 952 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter4/cer_10:%CER 24.90 [ 1883 / 7562, 403 ins, 534 del, 946 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter5/cer_10:%CER 22.86 [ 1729 / 7562, 367 ins, 500 del, 862 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter6/cer_10:%CER 21.66 [ 1638 / 7562, 347 ins, 506 del, 785 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter7/cer_10:%CER 21.37 [ 1616 / 7562, 334 ins, 549 del, 733 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter8/cer_10:%CER 21.46 [ 1623 / 7562, 328 ins, 612 del, 683 sub ]
exp/sgmm_5a/decode_wide_eval/cer_10:%CER 26.06 [ 1971 / 7562, 405 ins, 696 del, 870 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_1/cer_10:%CER 24.11 [ 1823 / 7562, 379 ins, 563 del, 881 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_2/cer_10:%CER 23.79 [ 1799 / 7562, 371 ins, 568 del, 860 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_3/cer_10:%CER 23.30 [ 1762 / 7562, 364 ins, 566 del, 832 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_4/cer_10:%CER 23.17 [ 1752 / 7562, 373 ins, 568 del, 811 sub ]
exp/nnet_8m_6l/decode_wide_eval/cer_10:%CER 24.13 [ 1825 / 7562, 384 ins, 535 del, 906 sub ]
exp/nnet_tanh_6l/decode_wide_eval/cer_10:%CER 21.22 [ 1605 / 7562, 365 ins, 485 del, 755 sub ]
exp/nnet_4m_3l/decode_wide_eval/cer_10:%CER 22.16 [ 1676 / 7562, 365 ins, 505 del, 806 sub ]
exp/nnet_tanh_3l/decode_wide_eval/cer_10:%CER 21.95 [ 1660 / 7562, 382 ins, 488 del, 790 sub ]
exp/tri5a_pretrain-dbn_dnn/decode_dnnwide/cer_10:%CER 20.47 [ 1548 / 7562, 383 ins, 467 del, 698 sub ]
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it1_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ]
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
### 16K wordlist close LM, the perplexity of the LM was optimized with the sentences of evaluation data
exp/tri5a/decode_wide_eval_closelm/cer_10:%CER 20.79 [ 1572 / 7562, 397 ins, 489 del, 686 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm/cer_10:%CER 16.58 [ 1254 / 7562, 308 ins, 441 del, 505 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_1/cer_10:%CER 17.56 [ 1328 / 7562, 333 ins, 424 del, 571 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_2/cer_10:%CER 16.87 [ 1276 / 7562, 322 ins, 425 del, 529 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_3/cer_10:%CER 16.66 [ 1260 / 7562, 315 ins, 437 del, 508 sub ]
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_4/cer_10:%CER 16.58 [ 1254 / 7562, 308 ins, 441 del, 505 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter1/cer_10:%CER 20.22 [ 1529 / 7562, 379 ins, 492 del, 658 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter2/cer_10:%CER 19.70 [ 1490 / 7562, 364 ins, 486 del, 640 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter3/cer_10:%CER 19.10 [ 1444 / 7562, 342 ins, 461 del, 641 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter4/cer_10:%CER 18.66 [ 1411 / 7562, 347 ins, 451 del, 613 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter5/cer_10:%CER 16.15 [ 1221 / 7562, 308 ins, 412 del, 501 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter6/cer_10:%CER 16.15 [ 1221 / 7562, 284 ins, 422 del, 515 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter7/cer_10:%CER 16.19 [ 1224 / 7562, 276 ins, 444 del, 504 sub ]
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter8/cer_10:%CER 16.38 [ 1239 / 7562, 277 ins, 463 del, 499 sub ]
exp/sgmm_5a/decode_wide_eval_closelm/cer_10:%CER 21.95 [ 1660 / 7562, 413 ins, 612 del, 635 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_1/cer_10:%CER 19.65 [ 1486 / 7562, 389 ins, 490 del, 607 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_2/cer_10:%CER 19.51 [ 1475 / 7562, 388 ins, 494 del, 593 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_3/cer_10:%CER 19.41 [ 1468 / 7562, 387 ins, 495 del, 586 sub ]
exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_4/cer_10:%CER 19.27 [ 1457 / 7562, 377 ins, 492 del, 588 sub ]
exp/nnet_8m_6l/decode_wide_eval_closelm/cer_10:%CER 17.87 [ 1351 / 7562, 343 ins, 453 del, 555 sub ]
exp/nnet_tanh_6l/decode_wide_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 336 ins, 452 del, 509 sub ]
exp/nnet_4m_3l/decode_wide_eval_closelm/cer_10:%CER 17.02 [ 1287 / 7562, 330 ins, 436 del, 521 sub ]
exp/nnet_tanh_3l/decode_wide_eval_closelm/cer_10:%CER 17.31 [ 1309 / 7562, 348 ins, 441 del, 520 sub ]
exp/tri5a_pretrain-dbn_dnn/decode_closelm_dnnwide/cer_10:%CER 16.42 [ 1242 / 7562, 337 ins, 414 del, 491 sub ]
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1_dnnwide/cer_10:%CER 15.26 [ 1154 / 7562, 279 ins, 409 del, 466 sub ]
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide/cer_10:%CER 15.31 [ 1158 / 7562, 279 ins, 408 del, 471 sub ]
##### end wide beam decoding results #####
### for n in `cat decode_dir `; do sh local/cal_runtime.sh $n; done
## wide beam decoding run time factor (source code version before 12th Oct 2013)
## Intel(R) Xeon(R) CPU X5690 @ 3.47GHz, > 64G RAM
# 1.8M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_closelm_xeon3.5/log 0.50984
exp/nnet_8m_6l/decode_wide_eval_closelm_xeon3.5/log 0.604995
exp/nnet_tanh_3l/decode_wide_eval_closelm_xeon3.5/log 0.584603
exp/nnet_tanh_6l/decode_wide_eval_closelm_xeon3.5/log 0.606776
exp/sgmm_5a/decode_wide_eval_closelm_xeon3.5/log 0.821786
exp/tri5a/decode_wide_eval_closelm_xeon3.5/log 0.988112
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter8_xeon3.5/log 1.09135
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_xeon3.5/log 1.07229
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide_xeon3.5/log 0.714925
#
# 28M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_xeon3.5/log 0.924118
exp/nnet_8m_6l/decode_wide_eval_xeon3.5/log 1.00894
exp/nnet_tanh_3l/decode_wide_eval_xeon3.5/log 1.00994
exp/nnet_tanh_6l/decode_wide_eval_xeon3.5/log 0.940629
exp/sgmm_5a/decode_wide_eval_xeon3.5/log 1.14046
exp/tri5a/decode_wide_eval_xeon3.5/log 1.34804
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter8_xeon3.5/log 1.87827
exp/tri5a_mmi_b0.1/decode_wide_eval_xeon3.5/log 1.77104
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide_xeon3.5/log 1.0829
## Intel(R) Xeon(R) CPU L5420 @ 2.50GHz , 6G RAM
# 1.8M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_closelm/log 1.18866
exp/nnet_8m_6l/decode_wide_eval_closelm_rerun/log 1.35657
exp/nnet_tanh_3l/decode_wide_eval_closelm/log 1.34746
exp/nnet_tanh_6l/decode_wide_eval_closelm/log 1.39452
exp/sgmm_5a/decode_wide_eval_closelm/log 1.71265
exp/tri5a/decode_wide_eval_closelm/log 2.29062
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter8/log 2.66123
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm/log 2.61595
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide/log 2.18625
#
# 28M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval/log 2.01065
exp/nnet_8m_6l/decode_wide_eval_rerun/log 2.16128
exp/nnet_tanh_3l/decode_wide_eval/log 2.16153
exp/nnet_tanh_6l/decode_wide_eval/log 2.09993
exp/sgmm_5a/decode_wide_eval/log 2.28183
exp/tri5a/decode_wide_eval/log 3.09584
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter8/log 4.32505
exp/tri5a_mmi_b0.1/decode_wide_eval/log 4.14296
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide/log 3.08541
# "queue.pl" uses qsub. The options to it are
# options to qsub. If you have GridEngine installed,
# change this to a queue you have access to.
# Otherwise, use "run.pl", which will run jobs locally
# (make sure your --num-jobs options are no more than
# the number of cpus on your machine.
#a) JHU cluster options
#export train_cmd="queue.pl -l arch=*64*"
#export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
#export cuda_cmd="..."
#export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G"
#b) BUT cluster options
#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
#c) run it locally...
export train_cmd=run.pl
export decode_cmd=run.pl
export cuda_cmd=run.pl
export mkgraph_cmd=run.pl
beam=11.0 # beam for decoding. Was 13.0 in the scripts.
first_beam=8.0 # beam for 1st-pass decoding in SAT.
beam=18.0 # beam for decoding. Was 13.0 in the scripts.
latbeam=10.0 # this has most effect on size of the lattices.
acwt=0.1
scoring_opts="--min-lmwt 9 --max-lmwt 19" # search acoustic scale in larger values
beam=18.0 # beam for decoding.
lattice_beam=10.0 # lattice beam for decoding
lat_beam=10.0 # lattice beam for decoding (## This is the variable in older kaldi scripts, and has been replaced by "lattice_beam")
latbeam=10.0 # lattice beam for decoding (## This is the variable in scripts such as decode.sh, decode_fmmi.sh)
first_beam=10.0 # beam for 1st-pass decoding in SAT.
--use-energy=false # only non-default option.
--sample-frequency=8000
#!/bin/bash
# Apache 2.0. Copyright 2013, Hong Kong University of Science and Technology (author: Ricky Chan Ho Yin)
# This script obtains the mixture weights from multiple interpolated LMs produced from build_interpolate_lm_4gram_sri.sh,
# and gives the average mixture weights. This script can be applied when mixture weights of a target interpolated LM are
# obtained from K-fold cross validation.
function printUsage {
echo "Usage: $0 mix_model_name1 mix_model_name2 ... ... mix_model_nameN ug|bg|tg|fg"
echo "Example: $0 mix_model_name1 mix_model_name2 mix_model_name3 tg"
}
if [ $# -lt 3 ]; then
printUsage
exit
fi
args=("$@")
lst=$(( $# - 1 ))
order=${args[${lst}]}
if [ $order != "ug" ] && [ $order != "bg" ] && [ $order != "tg" ] && [ $order != "fg" ]
then
printUsage
exit
fi
i=0
while [ $i -lt $lst ]
do
if [ ! -e ${args[$i]}/${args[$i]}.$order.wgt ]; then
echo "Interpolated LM mixture weights ${args[$i]}/${args[$i]}.$order.wgt not found!"
exit
fi
let i++
done
cmd="grep -h lambda "
i=0;
while [ $i -lt $lst ]
do
cmd=`echo $cmd ${args[$i]}/${args[$i]}.$order.wgt `
let i++
done
if [ -f cv_weight ]; then
echo " original cv_weight file move to cv_weight.bak"
echo
mv cv_weight cv_weight.bak
fi
echo " $cmd | cut -d'(' -f 2 | sed 's/)//g' > cv_weight "
echo
`echo $cmd` | cut -d'(' -f 2 | sed 's/)//g' > cv_weight
if [ -f avg_cv_weight ]; then
echo " original avg_cv_weight file move to avg_cv_weight.bak"
echo
mv avg_cv_weight avg_cv_weight.bak
fi
awk '{for(i=1; i<=NF; i++) {array[i] = array[i] + $i} }; END{for(i=1; i<=NF; i++) printf array[i]/NR " "; printf "\n";}' cv_weight > avg_cv_weight
#!/bin/bash
# Apache 2.0. Copyright 2013, Hong Kong University of Science and Technology (author: Ricky Chan Ho Yin)
# This script builds an interpolated LM from multiple seperated LMs produced from build_lm_cutoff_4gram.sh.
# The interpolated LM is built by merging the seperated LMs with mixture weights - that minimizes perplexity on "test_file".
# Perplexities of the interpolated LM on "test_file" are test and write into file ${lm_name}/perplexity.txt
srilmbinpath=/homes/ricky/softwares/srilm/bin/i686-m64/ # You may need to modify the path
if [ $# -ne 3 ]
then
echo
echo usage: $0 lm_name seperate_models_specifier test_file
echo
echo example: $0 mixlm123 model1+model2+model3 test_file
echo
exit
fi
NAME_LABEL=$1
MODELNAME=$2
MODELS=`echo $MODELNAME | awk '{gsub("+", " "); print $0}'`
TEST_FILE=$3
if [ ! -d $NAME_LABEL ]
then
mkdir $NAME_LABEL
fi
echo "[[Initialization]]"
for n in $MODELS
do
$srilmbinpath/ngram -debug 2 -ppl $TEST_FILE -lm $n/lm_0/ug > $NAME_LABEL/$n.ug.prob
$srilmbinpath/ngram -debug 2 -ppl $TEST_FILE -lm $n/lm_0/bg > $NAME_LABEL/$n.bg.prob
$srilmbinpath/ngram -debug 2 -ppl $TEST_FILE -lm $n/lm_0/tg > $NAME_LABEL/$n.tg.prob
$srilmbinpath/ngram -debug 2 -order 4 -ppl $TEST_FILE -lm $n/lm_0/fg > $NAME_LABEL/$n.fg.prob
done
echo "[[Compute weight]]"
for n in ug bg tg fg
do
argument=`echo $MODELS | awk -v aa=$NAME_LABEL -v tmp=$n '{for(i=1; i<=NF; i++) printf aa"/"$i "." tmp ".prob ";}'`
$srilmbinpath/compute-best-mix $argument > $NAME_LABEL/$NAME_LABEL.$n.wgt
done
echo "[[Merging LM]]"
for n in ug bg tg fg
do
array_wgt=(`cat $NAME_LABEL/$NAME_LABEL.$n.wgt |cut -f2 -d\( | cut -f1 -d\)`)
array_model=(`echo $MODELS`)
len=`expr ${#array_model[*]} - 1`
i=0
COMMAND=`echo -lm ${array_model[$i]}/lm_0/$n -lambda ${array_wgt[$i]}`
i=1
while [ $i -lt $len ]
do
j=`expr $i + 1`
COMMAND=`echo $COMMAND -mix-lm${j} ${array_model[$i]}/lm_0/$n -mix-lambda${j} ${array_wgt[$i]}`
let i++
done
if [ $n == "fg" ]
then
echo "$srilmbinpath/ngram -order 4 -renorm -map-unk \<unk\> $COMMAND -mix-lm ${array_model[$i]}/lm_0/$n -write-lm $NAME_LABEL/$NAME_LABEL.$n"
`echo $srilmbinpath/ngram -order 4 -renorm -map-unk \<unk\> $COMMAND -mix-lm ${array_model[$i]}/lm_0/$n -write-lm $NAME_LABEL/$NAME_LABEL.$n`
else
echo "$srilmbinpath/ngram -renorm -map-unk \<unk\> $COMMAND -mix-lm ${array_model[$i]}/lm_0/$n -write-lm $NAME_LABEL/$NAME_LABEL.$n"
`echo $srilmbinpath/ngram -renorm -map-unk \<unk\> $COMMAND -mix-lm ${array_model[$i]}/lm_0/$n -write-lm $NAME_LABEL/$NAME_LABEL.$n`
fi
done
echo "[[test perplexity]]"
$srilmbinpath/ngram -lm $NAME_LABEL/$NAME_LABEL.ug -ppl $TEST_FILE > $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -lm $NAME_LABEL/$NAME_LABEL.bg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -lm $NAME_LABEL/$NAME_LABEL.tg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -lm $NAME_LABEL/$NAME_LABEL.fg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
#!/bin/bash
# Apache 2.0. Copyright 2013, Hong Kong University of Science and Technology (author: Ricky Chan Ho Yin)
# Cambridge University Engineering Department Alumni
# This script builds individual languge model in arpa format using either SRILM or HTK binaries.
# Arpa format LM can be applied to Kaldi speech recognition toolkit for training and decoding
#
# If --use_sri option is applied, SRILM training with witten-bell discounting is used, otherwise HTK
# training is used.
#
# lm_name => LM name
# train_scp => file contains list of LM training text files
# wordlist => vocabulary list
# test_file => file for perplexity test
#
# htk_config => htk LM config
# bg_cutoff => bigram cutoff frequency threshold
# tg_cutoff => trigram cutoff frequency threshold
# fg_cutoff => fourgram cutoff frequency threshold
srilmbinpath=/homes/ricky/softwares/srilm/bin/i686-m64 # You probably need to modify the path
htklmbinpath=/homes/ricky/softwares/htk/bin # You probably need to modify the path
if [ $# -ne 8 ] && [ $# -ne 5 ]; then
echo Usage: $0 lm_name --use_sri train_scp wordlist test_file
echo Usage: $0 lm_name htk_config train_scp wordlist test_file bg_cutoff tg_cutoff fg_cutoff
exit;
fi
NAME_LABEL=$1
SCP=$3
WORDLIST=$4
TEST_FILE=$5
if [ $2 == "--use_sri" ]; then
numfile=`wc $SCP | awk '{print $1}'`
if [ $numfile -gt 1 ]; then
tmpdir=$(mktemp -d)
TRAIN_FILE=$tmpdir/train_text
rm -f $TRAIN_FILE
for n in `cat $SCP`; do
cat $n >> $TRAIN_FILE
done
else
TRAIN_FILE=`cat $SCP`
fi
else
CONFIG=$2
BG_CUT=$6
TG_CUT=$7
FG_CUT=$8
fi
if [ ! -d $NAME_LABEL ]
then
mkdir $NAME_LABEL
fi
LM_PLACE=$NAME_LABEL/lm_0
if [ ! -d $LM_PLACE ]
then
mkdir $LM_PLACE
fi
if [ $2 == "--use_sri" ]; then
echo "[[build N-grams]]"
$srilmbinpath/ngram-count -wbdiscount1 -order 1 -text $TRAIN_FILE -vocab $WORDLIST -lm $LM_PLACE/ug
$srilmbinpath/ngram-count -wbdiscount1 -wbdiscount2 -order 2 -text $TRAIN_FILE -vocab $WORDLIST -lm $LM_PLACE/bg
$srilmbinpath/ngram-count -wbdiscount1 -wbdiscount2 -wbdiscount3 -order 3 -text $TRAIN_FILE -vocab $WORDLIST -lm $LM_PLACE/tg
$srilmbinpath/ngram-count -wbdiscount1 -wbdiscount2 -wbdiscount3 -wbdiscount4 -order 4 -text $TRAIN_FILE -vocab $WORDLIST -lm $LM_PLACE/fg
echo "[[test perplexity]]"
$srilmbinpath/ngram -order 1 -lm $LM_PLACE/ug -ppl $TEST_FILE > $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -order 2 -lm $LM_PLACE/bg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -order 3 -lm $LM_PLACE/tg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
$srilmbinpath/ngram -order 4 -lm $LM_PLACE/fg -ppl $TEST_FILE >> $NAME_LABEL/perplexity.txt
rm -rf $tmpdir
exit
fi
LM_PLACE2=$NAME_LABEL/lm_1
if [ ! -d $LM_PLACE2 ]
then
mkdir $LM_PLACE2
fi
echo "[[Initialization]]"
$htklmbinpath/LNewMap -f WFC $NAME_LABEL $NAME_LABEL/empty.wmap
$htklmbinpath/LGPrep -A -T 1 -a 5000000 -b 100000000 -d $NAME_LABEL -n 4 -S $SCP $NAME_LABEL/empty.wmap
$htklmbinpath/LGCopy -T 1 -n 4 -b 10000000 -d $LM_PLACE $NAME_LABEL/wmap $NAME_LABEL/gram.*
$htklmbinpath/LGCopy -T 4 -n 4 -A -C $CONFIG -m $LM_PLACE2/wmap -a 5000000 -b 10000000 -d $LM_PLACE2 -w $WORDLIST $NAME_LABEL/wmap $LM_PLACE/data.*
$htklmbinpath/LFoF -T 1 -A -n 4 -f 128 $LM_PLACE2/wmap $LM_PLACE2/lm.train.fof $LM_PLACE2/data.* $LM_PLACE2/data.*
echo "[[build ug]]"
$htklmbinpath/LBuild -T 1 -A -f TEXT -n 1 -t $LM_PLACE2/lm.train.fof $LM_PLACE2/wmap $LM_PLACE/ug $LM_PLACE2/data.* $LM_PLACE2/data.*
echo "[[build bg]]"
$htklmbinpath/LBuild -T 1 -A -f TEXT -n 2 -c 2 $BG_CUT -t $LM_PLACE2/lm.train.fof $LM_PLACE2/wmap $LM_PLACE/bg $LM_PLACE2/data.* $LM_PLACE2/data.*
echo "[[build tg]]"
$htklmbinpath/LBuild -T 1 -A -f TEXT -n 3 -c 2 $BG_CUT -c 3 $TG_CUT -t $LM_PLACE2/lm.train.fof $LM_PLACE2/wmap $LM_PLACE/tg $LM_PLACE2/data.* $LM_PLACE2/data.*
echo "[[build fg]]"
$htklmbinpath/LBuild -T 1 -A -f TEXT -n 4 -c 2 $BG_CUT -c 3 $TG_CUT -c 4 $FG_CUT -t $LM_PLACE2/lm.train.fof $LM_PLACE2/wmap $LM_PLACE/fg $LM_PLACE2/data.* $LM_PLACE2/data.*
echo "[[test perplexity]]"
$htklmbinpath/LPlex -n 1 -C $CONFIG -t $LM_PLACE/ug $TEST_FILE > $NAME_LABEL/perplexity.txt
$htklmbinpath/LPlex -n 2 -C $CONFIG -t $LM_PLACE/bg $TEST_FILE >> $NAME_LABEL/perplexity.txt
$htklmbinpath/LPlex -n 3 -C $CONFIG -t $LM_PLACE/tg $TEST_FILE >> $NAME_LABEL/perplexity.txt
$htklmbinpath/LPlex -n 4 -C $CONFIG -t $LM_PLACE/fg $TEST_FILE >> $NAME_LABEL/perplexity.txt
#!/bin/bash
# Apache 2.0. Copyright 2013, Hong Kong University of Science and Technology (author: Ricky Chan Ho Yin)
# This script calculates the average decoding real-time factor of a decoding directory by using the run time information inside the logs
if [ $# -ne 1 ] && [ $# -ne 2 ]; then
echo "Usage: $0 decode_directory [framePerSecond]"
echo ""
echo "## The default framerate framePerSecond = 100 i.e. 10ms sliding for input features during decoding"
exit
fi
decodeDIR=$1
if [ ! -d $decodeDIR/log ]; then
echo "decoding directory $decodeDIR/log not exist"
exit
fi
if [ $# -eq 2 ]; then
framePerSecond=$2
else
framePerSecond=100.0
fi
printf "$decodeDIR/log\t"
tail $decodeDIR/log/decode*.log | egrep -e 'Time taken .* real-time|Overall log-likelihood per frame' | awk -v fps=$framePerSecond 'BEGIN{sumTime=0; sumFrame=0;} {if($0 ~ / Time taken /) {pos=match($0, " [0-9.]+s:"); pos2=match($0, "s: real-time factor"); sumTime+=substr($0, pos+1, pos2-pos-1); } else {sumFrame+=$(NF-1)}; }; END{print sumTime/(sumFrame/fps)}'
#!/usr/bin/perl
# Copyright 2014. Ricky Chan Ho Yin
#
# This script compares the test_scp and train_scp (and file size & content of
# the corresponding audio files (e.g. wav files) in --deep mode) to check if the
# test_scp (test set files) actually holdout from the train_scp (train set files)
#
# train_scp/test_scp format as follow:
# id filename
#
use File::Compare;
if(@ARGV != 2 && @ARGV != 3) {
printUsage();
exit;
}
$deep_check=0;
if(@ARGV == 2) {
$trainscp=$ARGV[0];
$testscp=$ARGV[1];
}
else {
if($ARGV[0] ne "--deep") {
printUsage();
exit;
}
$deep_check=1;
$trainscp=$ARGV[1];
$testscp=$ARGV[2];
}
%trainlist=();
%testlist=();