package org.apache.lucene.wordnet;
import org.apache.lucene.store.*;
import org.apache.lucene.search.*;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;
import org.apache.lucene.analysis.*;
import java.io.*;
import java.util.*;
/**
* Test program to look up synonyms.
*/
public class SynLookup {
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.out.println(
"java org.apache.lucene.wordnet.SynLookup <index path> <word>");
}
FSDirectory directory = FSDirectory.getDirectory(args[0], false);
IndexSearcher searcher = new IndexSearcher(directory);
String word = args[1];
Hits hits = searcher.search(
new TermQuery(new Term(Syns2Index.F_WORD, word)));
if (hits.length() == 0) {
System.out.println("No synonyms found for " + word);
} else {
System.out.println("Synonyms found for \"" + word + "\":");
}
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
String[] values = doc.getValues(Syns2Index.F_SYN);
for (int j = 0; j < values.length; j++) {
System.out.println(values[j]);
}
}
searcher.close();
directory.close();
}
/**
* Perform synonym expansion on a query.
*
* @param query
* @param syns
* @param a
* @param field
* @param boost
*/
public static Query expand( String query,
Searcher syns,
Analyzer a,
String field,
float boost)
throws IOException
{
Set already = new HashSet(); // avoid dups
List top = new LinkedList(); // needs to be separately listed..
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
org.apache.lucene.analysis.Token t;
while ( (t = ts.next()) != null)
{
String word = t.termText();
if ( already.add( word))
top.add( word);
}
BooleanQuery tmp = new BooleanQuery();
// [2] form query
Iterator it = top.iterator();
while ( it.hasNext())
{
// [2a] add to level words in
String word = (String) it.next();
TermQuery tq = new TermQuery( new Term( field, word));
tmp.add( tq, BooleanClause.Occur.SHOULD);
// [2b] add in unique synonums
Hits hits = syns.search( new TermQuery( new Term(Syns2Index.F_WORD, word)));
for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
String[] values = doc.getValues( Syns2Index.F_SYN);
for ( int j = 0; j < values.length; j++)
{
String syn = values[ j];
if ( already.add( syn))
{
tq = new TermQuery( new Term( field, syn));
if ( boost > 0) // else keep normal 1.0
tq.setBoost( boost);
tmp.add( tq, BooleanClause.Occur.SHOULD);
}
}
}
}
return tmp;
}
}
|