FileDocCategorySizeDatePackage
WordlistLoader.javaAPI DocApache Lucene 1.94025Mon Feb 20 09:19:46 GMT 2006org.apache.lucene.analysis

WordlistLoader

public class WordlistLoader extends Object
Loader for text files that represent a list of stopwords.
author
Gerhard Schwarz
version
$Id: WordlistLoader.java 192989 2005-06-22 19:59:03Z dnaber $

Fields Summary
Constructors Summary
Methods Summary
public static java.util.HashSetgetWordSet(java.io.File wordfile)
Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). Every line of the file should contain only one word. The words need to be in lowercase if you make use of an Analyzer which uses LowerCaseFilter (like StandardAnalyzer).

param
wordfile File containing the wordlist
return
A HashSet with the file's words

    HashSet result = new HashSet();
    FileReader reader = null;
    try {
      reader = new FileReader(wordfile);
      result = getWordSet(reader);
    }
    finally {
      if (reader != null)
        reader.close();
    }
    return result;
  
public static java.util.HashSetgetWordSet(java.io.Reader reader)
Reads lines from a Reader and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). Every line of the Reader should contain only one word. The words need to be in lowercase if you make use of an Analyzer which uses LowerCaseFilter (like StandardAnalyzer).

param
reader Reader containing the wordlist
return
A HashSet with the reader's words

    HashSet result = new HashSet();
    BufferedReader br = null;
    try {
      if (reader instanceof BufferedReader) {
        br = (BufferedReader) reader;
      } else {
        br = new BufferedReader(reader);
      }
      String word = null;
      while ((word = br.readLine()) != null) {
        result.add(word.trim());
      }
    }
    finally {
      if (br != null)
        br.close();
    }
    return result;
  
public static java.util.HashtablegetWordtable(java.lang.String path, java.lang.String wordfile)

param
path Path to the wordlist
param
wordfile Name of the wordlist
deprecated
Use {@link #getWordSet(File)} instead

    return getWordtable(new File(path, wordfile));
  
public static java.util.HashtablegetWordtable(java.lang.String wordfile)

param
wordfile Complete path to the wordlist
deprecated
Use {@link #getWordSet(File)} instead

    return getWordtable(new File(wordfile));
  
public static java.util.HashtablegetWordtable(java.io.File wordfile)

param
wordfile File object that points to the wordlist
deprecated
Use {@link #getWordSet(File)} instead

    HashSet wordSet = (HashSet)getWordSet(wordfile);
    Hashtable result = makeWordTable(wordSet);
    return result;
  
private static java.util.HashtablemakeWordTable(java.util.HashSet wordSet)
Builds a wordlist table, using words as both keys and values for backward compatibility.

param
wordSet stopword set

    Hashtable table = new Hashtable();
    for (Iterator iter = wordSet.iterator(); iter.hasNext();) {
      String word = (String)iter.next();
      table.put(word, word);
    }
    return table;