FileDocCategorySizeDatePackage
NGramTokenizerTest.javaAPI DocApache Lucene 2.1.04645Wed Feb 14 10:46:26 GMT 2007org.apache.lucene.analysis.ngram

NGramTokenizerTest

public class NGramTokenizerTest extends TestCase
Tests {@link NGramTokenizer} for correctness.
author
Otis Gospodnetic

Fields Summary
private StringReader
input
private ArrayList
tokens
Constructors Summary
Methods Summary
public voidsetUp()

    
       
        input = new StringReader("abcde");
    
public voidtestBigrams()

        NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);
        
        Token token = null;
        do { 
            token = tokenizer.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(4, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
        assertEquals(exp, tokens);
    
public voidtestInvalidInput()

        boolean gotException = false;
        try {        
            new NGramTokenizer(input, 2, 1);
        } catch (IllegalArgumentException e) {
            gotException = true;
        }
        assertTrue(gotException);
    
public voidtestInvalidInput2()

        boolean gotException = false;
        try {        
            new NGramTokenizer(input, 0, 1);
        } catch (IllegalArgumentException e) {
            gotException = true;
        }
        assertTrue(gotException);
    
public voidtestNgrams()

        NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);
        
        Token token = null;
        do { 
            token = tokenizer.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(12, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
        exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
        exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)");
        assertEquals(exp, tokens);
    
public voidtestOversizedNgrams()

        NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);
        
        Token token = null;
        do { 
            token = tokenizer.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertTrue(tokens.isEmpty());
    
public voidtestUnigrams()

        NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
        
        Token token = null;
        do { 
            token = tokenizer.next();
            if (token != null) {
                tokens.add(token.toString());
//                System.out.println(token.termText());
//                System.out.println(token);
//                Thread.sleep(1000);
            }
        } while (token != null);

        assertEquals(5, tokens.size());
        ArrayList exp = new ArrayList();
        exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
        assertEquals(exp, tokens);