FileDocCategorySizeDatePackage
WordlistLoader.javaAPI DocApache Lucene 1.93556Mon Feb 20 09:19:02 GMT 2006org.apache.lucene.analysis.nl

WordlistLoader

public class WordlistLoader extends Object
author
Gerhard Schwarz

Loads a text file and adds every line as an entry to a Hashtable. Every line should contain only one word. If the file is not found or on any error, an empty table is returned.

Fields Summary
Constructors Summary
Methods Summary
public static java.util.HashMapgetStemDict(java.io.File wordstemfile)
Reads a stemsdictionary. Each line contains: word \t stem i.e. tab seperated)

return
Stem dictionary that overrules, the stemming algorithm

    if (wordstemfile == null) {
      return new HashMap();
    }
    HashMap result = new HashMap();
    try {
      LineNumberReader lnr = new LineNumberReader(new FileReader(wordstemfile));
      String line;
      String[] wordstem;
      while ((line = lnr.readLine()) != null) {
        wordstem = line.split("\t", 2);
        result.put(wordstem[0], wordstem[1]);
      }
    } catch (IOException e) {
    }
    return result;
  
public static java.util.HashMapgetWordtable(java.lang.String path, java.lang.String wordfile)

param
path Path to the wordlist
param
wordfile Name of the wordlist

    if (path == null || wordfile == null) {
      return new HashMap();
    }
    return getWordtable(new File(path, wordfile));
  
public static java.util.HashMapgetWordtable(java.lang.String wordfile)

param
wordfile Complete path to the wordlist

    if (wordfile == null) {
      return new HashMap();
    }
    return getWordtable(new File(wordfile));
  
public static java.util.HashMapgetWordtable(java.io.File wordfile)

param
wordfile File containing the wordlist

    if (wordfile == null) {
      return new HashMap();
    }
    HashMap result = null;
    try {
      LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
      String word = null;
      String[] stopwords = new String[100];
      int wordcount = 0;
      while ((word = lnr.readLine()) != null) {
        wordcount++;
        if (wordcount == stopwords.length) {
          String[] tmp = new String[stopwords.length + 50];
          System.arraycopy(stopwords, 0, tmp, 0, wordcount);
          stopwords = tmp;
        }
        stopwords[wordcount - 1] = word;
      }
      result = makeWordTable(stopwords, wordcount);
    }
        // On error, use an empty table
    catch (IOException e) {
      result = new HashMap();
    }
    return result;
  
private static java.util.HashMapmakeWordTable(java.lang.String[] words, int length)
Builds the wordlist table.

param
words Word that where read
param
length Amount of words that where read into words

    HashMap table = new HashMap(length);
    for (int i = 0; i < length; i++) {
      table.put(words[i], words[i]);
    }
    return table;