FileDocCategorySizeDatePackage
NGramTokenFilterTest.javaAPI DocApache Lucene 2.2.04763Sat Jun 16 22:21:04 BST 2007org.apache.lucene.analysis.ngram

NGramTokenFilterTest

public class NGramTokenFilterTest extends TestCase
Tests {@link NGramTokenFilter} for correctness.
author
Otis Gospodnetic

Fields Summary
private TokenStream
input
private ArrayList
tokens
Constructors Summary
Methods Summary
public voidsetUp()

    
       
        input = new WhitespaceTokenizer(new StringReader("abcde"));
    
public voidtestBigrams()

      NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
        
        Token token = null;
        do { 
            token = filter.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(4, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
        assertEquals(exp, tokens);
    
public voidtestInvalidInput()

        boolean gotException = false;
        try {        
            new NGramTokenFilter(input, 2, 1);
        } catch (IllegalArgumentException e) {
            gotException = true;
        }
        assertTrue(gotException);
    
public voidtestInvalidInput2()

        boolean gotException = false;
        try {        
            new NGramTokenFilter(input, 0, 1);
        } catch (IllegalArgumentException e) {
            gotException = true;
        }
        assertTrue(gotException);
    
public voidtestNgrams()

      NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
        
        Token token = null;
        do { 
            token = filter.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(12, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
        exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)");
        assertEquals(exp, tokens);
    
public voidtestOversizedNgrams()

      NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
        
        Token token = null;
        do { 
            token = filter.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertTrue(tokens.isEmpty());
    
public voidtestUnigrams()

      NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
        
        Token token = null;
        do { 
            token = filter.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(5, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
        assertEquals(exp, tokens);