FileDocCategorySizeDatePackage
IndexLink.javaAPI DocExample2365Mon Feb 23 21:29:56 GMT 2004com.develop.ss

IndexLink.java

package com.develop.ss;

import junit.framework.TestCase;
import junit.framework.TestSuite;

import java.util.Set;
import java.util.HashSet;
import java.util.logging.Logger;
import java.util.logging.LogManager;
import java.util.logging.Level;
import java.io.IOException;

import com.meterware.httpunit.WebConversation;
import com.meterware.httpunit.WebResponse;
import com.meterware.httpunit.WebLink;
import com.meterware.httpunit.HttpNotFoundException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.demo.html.HTMLParser;
import org.xml.sax.SAXException;

public class IndexLink {
  private WebConversation conversation;
  private IndexLinks suite;
  private String name;
  static Logger log = Logger.getLogger("com.develop.ss");


  public IndexLink(String name, WebConversation conversation, IndexLinks suite) {
    this.name = name;
    if ((name == null) || (conversation == null) || (suite == null)) {
      throw new IllegalArgumentException("LinkTest constructor requires non-null args");
    }
    this.conversation = conversation;
    this.suite = suite;
  }

  public void checkLink() throws Exception {
    WebResponse response = null;
    try {
      response = conversation.getResponse(this.name);
    } catch (HttpNotFoundException hnfe) {
      //this.log("HTTP " + hnfe.getResponseCode() + " " + this.name);
    }
    if (!isIndexable(response)) {
      return;
    }
    addToIndex(response);
    WebLink[] links = response.getLinks();
    for (int i = 0; i < links.length; i++) {
      WebLink link = links[i];
      suite.considerNewLink(this.name, link);
    }

  }

  private void addToIndex(WebResponse response) throws SAXException, IOException, InterruptedException {
    Document d = new Document();
    HTMLParser parser = new HTMLParser(response.getInputStream());
    d.add(Field.UnIndexed("url", response.getURL().toExternalForm()));
    d.add(Field.UnIndexed("summary", parser.getSummary()));
    d.add(Field.Text("title", parser.getTitle()));
    d.add(Field.Text("contents", parser.getReader()));
    suite.addToIndex(d);
  }

  private boolean isIndexable(WebResponse response) {
    return response.getContentType().equals("text/html") || response.getContentType().equals("text/ascii");
  }


}