hkust_extract_subdict.pl 939 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
#!/usr/bin/perl
# Copyright Hong Kong University of Science and Technology (Author: Ricky Chan) 2013.

if($#ARGV+1 != 2) {
  print "usage: perl hkust_extract_subdict.pl dict wordlist \n";
  exit;
}

$dictfile=$ARGV[0];
$inputfile=$ARGV[1];

%dictionarylist=();
open(INFILE, $dictfile) || die("Can't open dict ".$dictfile."\n");
while(<INFILE>){
  chomp;
  @line=split(/\s+/);
  $a=$line[0];
  $b="";
  for($i=1; $i<scalar(@line); $i++) {
    $b=$b . " " . $line[$i];
  }
  $dictionarylist{$a}=$b;
}
close(INFILE);

open(INFILE, $inputfile) || die("Can't open wordlist ".$inputfile."\n");
while(<INFILE>) {
  chomp;
  @line = split(/\s+/);

  for($i=0; $i<scalar(@line); $i++) {
    print $line[$i]." ";
  }
  print "\t";

  for($i=0; $i<scalar(@line); $i++) {
    if(! exists($dictionarylist{$line[$i]})) {
      print "_NOT_FOUND_ ";
    }
    else {
      print $dictionarylist{$line[$i]}." ";
    }
  }
  print "\n";
}
close(INFILE);