FileDocCategorySizeDatePackage
TestSort.javaAPI DocApache Lucene 1.922547Mon Feb 20 09:20:30 GMT 2006org.apache.lucene.search

TestSort.java

package org.apache.lucene.search;

/**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import java.rmi.Naming;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.io.IOException;
import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;

import junit.framework.TestCase;
import junit.framework.Test;
import junit.framework.TestSuite;
import junit.textui.TestRunner;

/**
 * Unit tests for sorting code.
 *
 * <p>Created: Feb 17, 2004 4:55:10 PM
 *
 * @author  Tim Jones (Nacimiento Software)
 * @since   lucene 1.4
 * @version $Id: TestSort.java 189622 2005-06-08 19:28:47Z dnaber $
 */

public class TestSort
extends TestCase
implements Serializable {

	private Searcher full;
	private Searcher searchX;
	private Searcher searchY;
	private Query queryX;
	private Query queryY;
	private Query queryA;
	private Query queryF;
	private Sort sort;


	public TestSort (String name) {
		super (name);
	}

	public static void main (String[] argv) {
		if (argv == null || argv.length < 1)
			TestRunner.run (suite());
		else if ("server".equals (argv[0])) {
			TestSort test = new TestSort (null);
			try {
				test.startServer();
				Thread.sleep (500000);
			} catch (Exception e) {
				System.out.println (e);
				e.printStackTrace();
			}
		}
	}

	public static Test suite() {
		return new TestSuite (TestSort.class);
	}


	// document data:
	// the tracer field is used to determine which document was hit
	// the contents field is used to search and sort by relevance
	// the int field to sort by int
	// the float field to sort by float
	// the string field to sort by string
	private String[][] data = new String[][] {
	// tracer  contents         int            float           string   custom
	{   "A",   "x a",           "5",           "4f",           "c",     "A-3"   },
	{   "B",   "y a",           "5",           "3.4028235E38", "i",     "B-10"  },
	{   "C",   "x a b c",       "2147483647",  "1.0",          "j",     "A-2"   },
	{   "D",   "y a b c",       "-1",          "0.0f",         "a",     "C-0"   },
	{   "E",   "x a b c d",     "5",           "2f",           "h",     "B-8"   },
	{   "F",   "y a b c d",     "2",           "3.14159f",     "g",     "B-1"   },
	{   "G",   "x a b c d",     "3",           "-1.0",         "f",     "C-100" },
	{   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88"  },
	{   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10"  },
	{   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7"   },
	{   "Z",   "f",             null,          null,           null,    null    }
	};

	// create an index of all the documents, or just the x, or just the y documents
	private Searcher getIndex (boolean even, boolean odd)
	throws IOException {
		RAMDirectory indexStore = new RAMDirectory ();
		IndexWriter writer = new IndexWriter (indexStore, new SimpleAnalyzer(), true);
		for (int i=0; i<data.length; ++i) {
			if (((i%2)==0 && even) || ((i%2)==1 && odd)) {
				Document doc = new Document();          // store, index, token
				doc.add (new Field ("tracer",   data[i][0], true, false, false));
				doc.add (new Field ("contents", data[i][1], false, true, true));
				if (data[i][2] != null) doc.add (new Field ("int",      data[i][2], false, true, false));
				if (data[i][3] != null) doc.add (new Field ("float",    data[i][3], false, true, false));
				if (data[i][4] != null) doc.add (new Field ("string",   data[i][4], false, true, false));
				if (data[i][5] != null) doc.add (new Field ("custom",   data[i][5], false, true, false));
				writer.addDocument (doc);
			}
		}
		writer.optimize ();
		writer.close ();
		return new IndexSearcher (indexStore);
	}

	private Searcher getFullIndex()
	throws IOException {
		return getIndex (true, true);
	}

	private Searcher getXIndex()
	throws IOException {
		return getIndex (true, false);
	}

	private Searcher getYIndex()
	throws IOException {
		return getIndex (false, true);
	}

	private Searcher getEmptyIndex()
	throws IOException {
		return getIndex (false, false);
	}

	public void setUp() throws Exception {
		full = getFullIndex();
		searchX = getXIndex();
		searchY = getYIndex();
		queryX = new TermQuery (new Term ("contents", "x"));
		queryY = new TermQuery (new Term ("contents", "y"));
		queryA = new TermQuery (new Term ("contents", "a"));
		queryF = new TermQuery (new Term ("contents", "f"));
		sort = new Sort();
	}

	// test the sorts by score and document number
	public void testBuiltInSorts() throws Exception {
		sort = new Sort();
		assertMatches (full, queryX, sort, "ACEGI");
		assertMatches (full, queryY, sort, "BDFHJ");

		sort.setSort(SortField.FIELD_DOC);
		assertMatches (full, queryX, sort, "ACEGI");
		assertMatches (full, queryY, sort, "BDFHJ");
	}

	// test sorts where the type of field is specified
	public void testTypedSort() throws Exception {
		sort.setSort (new SortField[] { new SortField ("int", SortField.INT), SortField.FIELD_DOC });
		assertMatches (full, queryX, sort, "IGAEC");
		assertMatches (full, queryY, sort, "DHFJB");

		sort.setSort (new SortField[] { new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC });
		assertMatches (full, queryX, sort, "GCIEA");
		assertMatches (full, queryY, sort, "DHJFB");

		sort.setSort (new SortField[] { new SortField ("string", SortField.STRING), SortField.FIELD_DOC });
		assertMatches (full, queryX, sort, "AIGEC");
		assertMatches (full, queryY, sort, "DJHFB");
	}

	// test sorts when there's nothing in the index
	public void testEmptyIndex() throws Exception {
		Searcher empty = getEmptyIndex();

		sort = new Sort();
		assertMatches (empty, queryX, sort, "");

		sort.setSort(SortField.FIELD_DOC);
		assertMatches (empty, queryX, sort, "");

		sort.setSort (new SortField[] { new SortField ("int", SortField.INT), SortField.FIELD_DOC });
		assertMatches (empty, queryX, sort, "");

		sort.setSort (new SortField[] { new SortField ("string", SortField.STRING, true), SortField.FIELD_DOC });
		assertMatches (empty, queryX, sort, "");

		sort.setSort (new SortField[] { new SortField ("float", SortField.FLOAT), new SortField ("string", SortField.STRING) });
		assertMatches (empty, queryX, sort, "");
	}

	// test sorts where the type of field is determined dynamically
	public void testAutoSort() throws Exception {
		sort.setSort("int");
		assertMatches (full, queryX, sort, "IGAEC");
		assertMatches (full, queryY, sort, "DHFJB");

		sort.setSort("float");
		assertMatches (full, queryX, sort, "GCIEA");
		assertMatches (full, queryY, sort, "DHJFB");

		sort.setSort("string");
		assertMatches (full, queryX, sort, "AIGEC");
		assertMatches (full, queryY, sort, "DJHFB");
	}

	// test sorts in reverse
	public void testReverseSort() throws Exception {
		sort.setSort (new SortField[] { new SortField (null, SortField.SCORE, true), SortField.FIELD_DOC });
		assertMatches (full, queryX, sort, "IEGCA");
		assertMatches (full, queryY, sort, "JFHDB");

		sort.setSort (new SortField (null, SortField.DOC, true));
		assertMatches (full, queryX, sort, "IGECA");
		assertMatches (full, queryY, sort, "JHFDB");

		sort.setSort ("int", true);
		assertMatches (full, queryX, sort, "CAEGI");
		assertMatches (full, queryY, sort, "BJFHD");

		sort.setSort ("float", true);
		assertMatches (full, queryX, sort, "AECIG");
		assertMatches (full, queryY, sort, "BFJHD");

		sort.setSort ("string", true);
		assertMatches (full, queryX, sort, "CEGIA");
		assertMatches (full, queryY, sort, "BFHJD");
	}

	// test sorting when the sort field is empty (undefined) for some of the documents
	public void testEmptyFieldSort() throws Exception {
		sort.setSort ("string");
		assertMatches (full, queryF, sort, "ZJI");

		sort.setSort ("string", true);
		assertMatches (full, queryF, sort, "IJZ");

		sort.setSort ("int");
		assertMatches (full, queryF, sort, "IZJ");

		sort.setSort ("int", true);
		assertMatches (full, queryF, sort, "JZI");

		sort.setSort ("float");
		assertMatches (full, queryF, sort, "ZJI");

		sort.setSort ("float", true);
		assertMatches (full, queryF, sort, "IJZ");
	}

	// test sorts using a series of fields
	public void testSortCombos() throws Exception {
		sort.setSort (new String[] {"int","float"});
		assertMatches (full, queryX, sort, "IGEAC");

		sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC, true) });
		assertMatches (full, queryX, sort, "CEAGI");

		sort.setSort (new String[] {"float","string"});
		assertMatches (full, queryX, sort, "GICEA");
	}

	// test using a Locale for sorting strings
	public void testLocaleSort() throws Exception {
		sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
		assertMatches (full, queryX, sort, "AIGEC");
		assertMatches (full, queryY, sort, "DJHFB");

		sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) });
		assertMatches (full, queryX, sort, "CEGIA");
		assertMatches (full, queryY, sort, "BFHJD");
	}

	// test a custom sort function
	public void testCustomSorts() throws Exception {
		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
		assertMatches (full, queryX, sort, "CAIEG");
		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true));
		assertMatches (full, queryY, sort, "HJDBF");
		SortComparator custom = SampleComparable.getComparator();
		sort.setSort (new SortField ("custom", custom));
		assertMatches (full, queryX, sort, "CAIEG");
		sort.setSort (new SortField ("custom", custom, true));
		assertMatches (full, queryY, sort, "HJDBF");
	}

	// test a variety of sorts using more than one searcher
	public void testMultiSort() throws Exception {
		MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY });
		runMultiSorts (searcher);
	}

	// test a variety of sorts using a parallel multisearcher
	public void testParallelMultiSort() throws Exception {
		Searcher searcher = new ParallelMultiSearcher (new Searchable[] { searchX, searchY });
		runMultiSorts (searcher);
	}

	// test a variety of sorts using a remote searcher
	public void testRemoteSort() throws Exception {
		Searchable searcher = getRemote();
		MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
		runMultiSorts (multi);
	}

	// test custom search when remote
	public void testRemoteCustomSort() throws Exception {
		Searchable searcher = getRemote();
		MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
		assertMatches (multi, queryX, sort, "CAIEG");
		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true));
		assertMatches (multi, queryY, sort, "HJDBF");
		SortComparator custom = SampleComparable.getComparator();
		sort.setSort (new SortField ("custom", custom));
		assertMatches (multi, queryX, sort, "CAIEG");
		sort.setSort (new SortField ("custom", custom, true));
		assertMatches (multi, queryY, sort, "HJDBF");
	}

	// test that the relevancy scores are the same even if
	// hits are sorted
	public void testNormalizedScores() throws Exception {

		// capture relevancy scores
		HashMap scoresX = getScores (full.search (queryX));
		HashMap scoresY = getScores (full.search (queryY));
		HashMap scoresA = getScores (full.search (queryA));

		// we'll test searching locally, remote and multi
		// note: the multi test depends on each separate index containing
		// the same documents as our local index, so the computed normalization
		// will be the same.  so we make a multi searcher over two equal document
		// sets - not realistic, but necessary for testing.
		MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() });
		MultiSearcher multi  = new MultiSearcher (new Searchable[] { full, full });

		// change sorting and make sure relevancy stays the same

		sort = new Sort();
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort(SortField.FIELD_DOC);
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort ("int");
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort ("float");
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort ("string");
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort (new String[] {"int","float"});
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC, true) });
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

		sort.setSort (new String[] {"float","string"});
		assertSameValues (scoresX, getScores(full.search(queryX,sort)));
		assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
		assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
		assertSameValues (scoresY, getScores(full.search(queryY,sort)));
		assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
		assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
		assertSameValues (scoresA, getScores(full.search(queryA,sort)));
		assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
		//assertSameValues (scoresA, getScores(multi.search(queryA,sort)));

	}

	// runs a variety of sorts useful for multisearchers
	private void runMultiSorts (Searcher multi) throws Exception {
		sort.setSort (SortField.FIELD_DOC);
		assertMatchesPattern (multi, queryA, sort, "[AB]{2}[CD]{2}[EF]{2}[GH]{2}[IJ]{2}");

		sort.setSort (new SortField ("int", SortField.INT));
		assertMatchesPattern (multi, queryA, sort, "IDHFGJ[ABE]{3}C");

		sort.setSort (new SortField[] {new SortField ("int", SortField.INT), SortField.FIELD_DOC});
		assertMatchesPattern (multi, queryA, sort, "IDHFGJ[AB]{2}EC");

		sort.setSort ("int");
		assertMatchesPattern (multi, queryA, sort, "IDHFGJ[AB]{2}EC");

		sort.setSort (new SortField[] {new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC});
		assertMatchesPattern (multi, queryA, sort, "GDHJ[CI]{2}EFAB");

		sort.setSort ("float");
		assertMatchesPattern (multi, queryA, sort, "GDHJ[CI]{2}EFAB");

		sort.setSort ("string");
		assertMatches (multi, queryA, sort, "DJAIHGFEBC");

		sort.setSort ("int", true);
		assertMatchesPattern (multi, queryA, sort, "C[AB]{2}EJGFHDI");

		sort.setSort ("float", true);
		assertMatchesPattern (multi, queryA, sort, "BAFE[IC]{2}JHDG");

		sort.setSort ("string", true);
		assertMatches (multi, queryA, sort, "CBEFGHIAJD");

		sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
		assertMatches (multi, queryA, sort, "DJAIHGFEBC");

		sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) });
		assertMatches (multi, queryA, sort, "CBEFGHIAJD");

		sort.setSort (new String[] {"int","float"});
		assertMatches (multi, queryA, sort, "IDHFGJEABC");

		sort.setSort (new String[] {"float","string"});
		assertMatches (multi, queryA, sort, "GDHJICEFAB");

		sort.setSort ("int");
		assertMatches (multi, queryF, sort, "IZJ");

		sort.setSort ("int", true);
		assertMatches (multi, queryF, sort, "JZI");

		sort.setSort ("float");
		assertMatches (multi, queryF, sort, "ZJI");

		sort.setSort ("string");
		assertMatches (multi, queryF, sort, "ZJI");

		sort.setSort ("string", true);
		assertMatches (multi, queryF, sort, "IJZ");
	}

	// make sure the documents returned by the search match the expected list
	private void assertMatches (Searcher searcher, Query query, Sort sort, String expectedResult)
	throws IOException {
		Hits result = searcher.search (query, sort);
		StringBuffer buff = new StringBuffer(10);
		int n = result.length();
		for (int i=0; i<n; ++i) {
			Document doc = result.doc(i);
			String[] v = doc.getValues("tracer");
			for (int j=0; j<v.length; ++j) {
				buff.append (v[j]);
			}
		}
		assertEquals (expectedResult, buff.toString());
	}

	// make sure the documents returned by the search match the expected list pattern
	private void assertMatchesPattern (Searcher searcher, Query query, Sort sort, String pattern)
	throws IOException {
		Hits result = searcher.search (query, sort);
		StringBuffer buff = new StringBuffer(10);
		int n = result.length();
		for (int i=0; i<n; ++i) {
			Document doc = result.doc(i);
			String[] v = doc.getValues("tracer");
			for (int j=0; j<v.length; ++j) {
				buff.append (v[j]);
			}
		}
		// System.out.println ("matching \""+buff+"\" against pattern \""+pattern+"\"");
		assertTrue (Pattern.compile(pattern).matcher(buff.toString()).matches());
	}

	private HashMap getScores (Hits hits)
	throws IOException {
		HashMap scoreMap = new HashMap();
		int n = hits.length();
		for (int i=0; i<n; ++i) {
			Document doc = hits.doc(i);
			String[] v = doc.getValues("tracer");
			assertEquals (v.length, 1);
			scoreMap.put (v[0], new Float(hits.score(i)));
		}
		return scoreMap;
	}

	// make sure all the values in the maps match
	private void assertSameValues (HashMap m1, HashMap m2) {
		int n = m1.size();
		int m = m2.size();
		assertEquals (n, m);
		Iterator iter = m1.keySet().iterator();
		while (iter.hasNext()) {
			Object key = iter.next();
			assertEquals (m1.get(key), m2.get(key));
		}
	}

	private Searchable getRemote () throws Exception {
		try {
			return lookupRemote ();
		} catch (Throwable e) {
			startServer ();
			return lookupRemote ();
		}
	}

	private Searchable lookupRemote () throws Exception {
		return (Searchable) Naming.lookup ("//localhost/SortedSearchable");
	}

	private void startServer () throws Exception {
		// construct an index
		Searcher local = getFullIndex();
		// local.search (queryA, new Sort());

		// publish it
		Registry reg = LocateRegistry.createRegistry (1099);
		RemoteSearchable impl = new RemoteSearchable (local);
		Naming.rebind ("//localhost/SortedSearchable", impl);
	}

}