// make a new, empty document
Document doc = new Document();
// Add the url as a field named "url". Use an UnIndexed field, so
// that the url is just stored with the document, but is not searchable.
doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/")));
// Add the last modified date of the file a field named "modified". Use a
// Keyword field, so that it's searchable, but so that no attempt is made
// to tokenize the field into words.
doc.add(Field.Keyword("modified",
DateField.timeToString(f.lastModified())));
// Add the uid as a field, so that index can be incrementally maintained.
// This field is not stored with document, it is indexed, but it is not
// tokenized prior to indexing.
doc.add(new Field("uid", uid(f), false, true, false));
HTMLParser parser = new HTMLParser(f);
// Add the tag-stripped contents as a Reader-valued Text field so it will
// get tokenized and indexed.
doc.add(Field.Text("contents", parser.getReader()));
// Add the summary as an UnIndexed field, so that it is stored and returned
// with hit documents for display.
doc.add(Field.UnIndexed("summary", parser.getSummary()));
// Add the title as a separate Text field, so that it can be searched
// separately.
doc.add(Field.Text("title", parser.getTitle()));
// return the document
return doc;