Methods Summary |
---|
public void | count(java.lang.String queryString)
Hits hits = initSearch(queryString);
System.out.println(hits.length() + " total documents");
searcher.close();
|
private org.apache.lucene.search.Query | explainQuery(java.lang.String queryString)
searcher = new IndexSearcher(indexName);
Analyzer analyzer = new StandardAnalyzer();
getFieldInfo();
MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
int arraySize = indexedFields.size();
String indexedArray[] = new String[arraySize];
for (int ii = 0; ii < arraySize; ii++) {
indexedArray[ii] = (String) indexedFields.get(ii);
}
query = parser.parse(queryString, indexedArray, analyzer);
System.out.println("Searching for: " + query.toString());
return (query);
|
private void | getFieldInfo()
IndexReader indexReader = IndexReader.open(indexName);
fields = new Vector();
indexedFields = new Vector();
//get the list of all field names
fieldIterator = indexReader.getFieldNames().iterator();
while (fieldIterator.hasNext()) {
Object field = fieldIterator.next();
if (field != null && !field.equals(""))
fields.add(field.toString());
}
//
//get the list of indexed field names
fieldIterator = indexReader.getFieldNames(true).iterator();
while (fieldIterator.hasNext()) {
Object field = fieldIterator.next();
if (field != null && !field.equals(""))
indexedFields.add(field.toString());
}
indexReader.close();
|
public static java.util.Map.Entry[] | getSortedHashtableEntries(java.util.Hashtable h)Sort Hashtable values
Set set = h.entrySet();
Entry[] entries =
(Entry[]) set.toArray(
new Entry[set.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object o1, Object o2) {
Object v1 = ((Entry) o1).getValue();
Object v2 = ((Entry) o2).getValue();
return ((Comparable) v2).compareTo(v1); //descending order
}
});
return entries;
|
public void | info()
IndexReader indexReader = IndexReader.open(indexName);
getFieldInfo();
numDocs = indexReader.numDocs();
message("Index has " + numDocs + " documents ");
message("All Fields:" + fields.toString());
message("Indexed Fields:" + indexedFields.toString());
if (IndexReader.isLocked(indexName)) {
message("Index is locked");
}
//IndexReader.getCurrentVersion(indexName);
//System.out.println("Version:" + version);
indexReader.close();
|
private org.apache.lucene.search.Hits | initSearch(java.lang.String queryString)
searcher = new IndexSearcher(indexName);
Analyzer analyzer = new StandardAnalyzer();
getFieldInfo();
MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
int arraySize = fields.size();
fieldsArray = new String[arraySize];
for (int ii = 0; ii < arraySize; ii++) {
fieldsArray[ii] = (String) fields.get(ii);
}
query = parser.parse(queryString, fieldsArray, analyzer);
System.out.println("Searching for: " + query.toString());
Hits hits = searcher.search(query);
return (hits);
|
private void | invertDocument(org.apache.lucene.document.Document doc)
Hashtable tokenHash = new Hashtable();
final int maxFieldLength = 10000;
Analyzer analyzer = new StandardAnalyzer();
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
String fieldName = field.name();
if (field.isIndexed()) {
if (field.isTokenized()) { // un-tokenized field
Reader reader; // find or make Reader
if (field.readerValue() != null)
reader = field.readerValue();
else if (field.stringValue() != null)
reader = new StringReader(field.stringValue());
else
throw new IllegalArgumentException
("field must have either String or Reader value");
int position = 0;
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
try {
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
position++;
String name = t.termText();
Integer Count = (Integer) tokenHash.get(name);
if (Count == null) { // not in there yet
tokenHash.put(name, new Integer(1)); //first one
} else {
int count = Count.intValue();
tokenHash.put(name, new Integer(count + 1));
}
if (position > maxFieldLength) break;
}
} finally {
stream.close();
}
}
}
}
Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
Entry currentEntry = sortedHash[ii];
message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
}
|
public static void | message(java.lang.String s)
System.out.println(s);
|
public void | optimize()
//open the index writer. False: don't create a new one
IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
message("Starting to optimize index.");
long start = System.currentTimeMillis();
indexWriter.optimize();
message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
indexWriter.close();
|
private void | printHit(org.apache.lucene.document.Document doc)
for (int ii = 0; ii < fieldsArray.length; ii++) {
String currField = fieldsArray[ii];
String[] result = doc.getValues(currField);
if (result != null) {
for (int i = 0; i < result.length; i++) {
message(currField + ":" + result[i]);
}
} else {
message(currField + ": <not available>");
}
}
//another option is to just do message(doc);
|
public void | search(java.lang.String queryString, boolean explain, boolean showTokens, jline.ConsoleReader cr)
Hits hits = initSearch(queryString);
System.out.println(hits.length() + " total matching documents");
if (explain) {
query = explainQuery(queryString);
}
final int HITS_PER_PAGE = 10;
message("--------------------------------------");
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
for (int ii = start; ii < end; ii++) {
Document doc = hits.doc(ii);
message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
printHit(doc);
if (showTokens) {
invertDocument(doc);
}
if (explain) {
Explanation exp = searcher.explain(query, hits.id(ii));
message("Explanation:" + exp.toString());
}
}
message("#################################################");
if (hits.length() > end) {
// TODO: don't let the input end up in the command line history
queryString = cr.readLine("more (y/n) ? ");
if (queryString.length() == 0 || queryString.charAt(0) == 'n")
break;
}
}
searcher.close();
|
public void | terms(java.lang.String field)Provides a list of the top terms of the index.
TreeMap termMap = new TreeMap();
IndexReader indexReader = IndexReader.open(indexName);
TermEnum terms = indexReader.terms();
while (terms.next()) {
Term term = terms.term();
//message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
//if we're either not looking by field or we're matching the specific field
if ((field == null) || field.equals(term.field()))
termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
}
Iterator termIterator = termMap.keySet().iterator();
for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
String termDetails = (String) termIterator.next();
Integer termFreq = (Integer) termMap.get(termDetails);
message(termDetails + ": " + termFreq);
}
indexReader.close();
|