NGramTokenFilterTestpublic class NGramTokenFilterTest extends TestCase Tests {@link NGramTokenFilter} for correctness. |
Fields Summary |
---|
private TokenStream | input | private ArrayList | tokens |
Methods Summary |
---|
public void | setUp()
input = new WhitespaceTokenizer(new StringReader("abcde"));
| public void | testBigrams()
NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
Token token = null;
do {
token = filter.next();
if (token != null) {
tokens.add(token.toString());
// System.out.println(token.termText());
// System.out.println(token);
// Thread.sleep(1000);
}
} while (token != null);
assertEquals(4, tokens.size());
ArrayList exp = new ArrayList();
exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
assertEquals(exp, tokens);
| public void | testInvalidInput()
boolean gotException = false;
try {
new NGramTokenFilter(input, 2, 1);
} catch (IllegalArgumentException e) {
gotException = true;
}
assertTrue(gotException);
| public void | testInvalidInput2()
boolean gotException = false;
try {
new NGramTokenFilter(input, 0, 1);
} catch (IllegalArgumentException e) {
gotException = true;
}
assertTrue(gotException);
| public void | testNgrams()
NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
Token token = null;
do {
token = filter.next();
if (token != null) {
tokens.add(token.toString());
// System.out.println(token.termText());
// System.out.println(token);
// Thread.sleep(1000);
}
} while (token != null);
assertEquals(12, tokens.size());
ArrayList exp = new ArrayList();
exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
exp.add("(ab,0,2)"); exp.add("(bc,1,3)"); exp.add("(cd,2,4)"); exp.add("(de,3,5)");
exp.add("(abc,0,3)"); exp.add("(bcd,1,4)"); exp.add("(cde,2,5)");
assertEquals(exp, tokens);
| public void | testOversizedNgrams()
NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
Token token = null;
do {
token = filter.next();
if (token != null) {
tokens.add(token.toString());
// System.out.println(token.termText());
// System.out.println(token);
// Thread.sleep(1000);
}
} while (token != null);
assertTrue(tokens.isEmpty());
| public void | testUnigrams()
NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
Token token = null;
do {
token = filter.next();
if (token != null) {
tokens.add(token.toString());
// System.out.println(token.termText());
// System.out.println(token);
// Thread.sleep(1000);
}
} while (token != null);
assertEquals(5, tokens.size());
ArrayList exp = new ArrayList();
exp.add("(a,0,1)"); exp.add("(b,1,2)"); exp.add("(c,2,3)"); exp.add("(d,3,4)"); exp.add("(e,4,5)");
assertEquals(exp, tokens);
|
|