FileDocCategorySizeDatePackage
IndexLinks.javaAPI DocExample6463Mon Feb 23 21:27:48 GMT 2004com.develop.ss

IndexLinks

public class IndexLinks extends Object

Fields Summary
com.meterware.httpunit.WebConversation
conversation
Set
linksAlreadyFollowed
Set
linksNotFollowed
Set
linkPrefixesToFollow
HashSet
linkPrefixesToAvoid
HashSet
linksToIndex
private String
linksNotFollowedOutputFileName
private int
maxLinks
private IndexWriter
writer
private String
initialLink
private static String[]
EMPTY_STRINGARRAY
Constructors Summary
public IndexLinks(String indexPath, int maxLinks, String skippedLinksOutputFileName)


  // constructors
           
    this.maxLinks = maxLinks;
    this.linksNotFollowedOutputFileName = skippedLinksOutputFileName;
    writer = new IndexWriter(indexPath, new StandardAnalyzer(), true);
  
Methods Summary
public voidaddLink(IndexLink link)

      try
      {
        link.checkLink();
      }
      catch(Exception ex)
      {
      }
      linksToIndex.add(link);
  
public voidaddToIndex(org.apache.lucene.document.Document d)

      try
      {
      writer.addDocument(d);
      }
      catch(Exception ex)
      {
      }
  
voidconsiderNewLink(java.lang.String linkFrom, com.meterware.httpunit.WebLink newLink)

    URL url = null;
    url = newLink.getRequest().getURL();
    if (shouldFollowLink(url)) {
      if (linksAlreadyFollowed.add(url.toExternalForm())) {
        if (linksAlreadyFollowed.size() > maxLinks) {
          linksAlreadyFollowed.remove(url.toExternalForm());
          throw new Error("Max links exceeded " + maxLinks);
        }
        if (shouldNotFollowLink(url)) {
          IndexLink.log.info("Not following " + url.toExternalForm() + " from " + linkFrom);
        } else {
          IndexLink.log.info("Following " + url.toExternalForm() + " from " + linkFrom);
          addLink(new IndexLink(url.toString(),conversation, this));
        }
      }
    } else {
      ignoreLink(url, linkFrom);
    }
  
public voidexecute()

      for(int i = 0;i<linksToIndex.size() - 1;i++)
      {
        try
        {
            ((IndexLink)linksToIndex.toArray()[i]).checkLink();
        }
        catch(Exception ex)
        {
        }
      }

  
public java.lang.StringgetInitialLink()

    return initialLink;
  
public org.apache.lucene.index.IndexWritergetWriter()

    return writer;
  
private voidignoreLink(java.net.URL url, java.lang.String linkFrom)

    String status = "Ignoring " + url.toExternalForm() + " from " + linkFrom;
    linksNotFollowed.add(status);
    IndexLink.log.fine(status);
  
public voidinitAvoidPrefixesFromSystemProperties()

    String avoidPrefixes = System.getProperty("com.develop.ss.AvoidLinks");
    if (avoidPrefixes == null || avoidPrefixes.length() == 0) return;
    String[] prefixes = avoidPrefixes.split(" ");
    if (prefixes != null && prefixes.length != 0) {
      setAvoidPrefixes(prefixes);
    }
  
public voidinitFollowPrefixesFromSystemProperties()

    String followPrefixes = System.getProperty("com.develop.ss.FollowLinks");
    if (followPrefixes == null || followPrefixes.length() == 0) return;
    String[] prefixes = followPrefixes.split(" ");
    if (prefixes != null && prefixes.length != 0) {
      setFollowPrefixes(prefixes);
    }
  
private booleanmatchesDownToPathPrefix(java.net.URL matchBase, java.net.URL newLink)

    return matchBase.getHost().equals(newLink.getHost()) &&
       matchBase.getPort() == newLink.getPort() &&
       matchBase.getProtocol().equals(newLink.getProtocol()) &&
       newLink.getPath().startsWith(matchBase.getPath());
  
public voidsetAvoidPrefixes(java.lang.String[] prefixesToAvoid)

    for (int i = 0; i < prefixesToAvoid.length; i++) {
      String s = prefixesToAvoid[i];
      IndexLink.log.info("Avoid links prefixed with " + s);
      linkPrefixesToAvoid.add(new URL(s));
    }
  
public voidsetFollowPrefixes(java.lang.String[] prefixesToFollow)

    for (int i = 0; i < prefixesToFollow.length; i++) {
      String s = prefixesToFollow[i];
      IndexLink.log.info("Following links prefixed with " + s);
      linkPrefixesToFollow.add(new URL(s));
    }
  
public voidsetInitialLink(java.lang.String initialLink)

    if ((initialLink == null) || (initialLink.length() == 0)) {
      throw new Error("Must specify a non-null initialLink");
    }
    linkPrefixesToFollow.add(new URL(initialLink));
    this.initialLink = initialLink;
    IndexLink.log.info("Initial link is " + initialLink);
    addLink(new IndexLink(initialLink,conversation,this));
  
booleanshouldFollowLink(java.net.URL newLink)

    for (Iterator iterator = linkPrefixesToFollow.iterator(); iterator.hasNext();) {
      URL u = (URL) iterator.next();
      if (matchesDownToPathPrefix(u, newLink)) {
        return true;
      }
    }
    return false;
  
booleanshouldNotFollowLink(java.net.URL newLink)

      for (Iterator iterator = linkPrefixesToAvoid.iterator(); iterator.hasNext();) {
      URL u = (URL) iterator.next();
      if (matchesDownToPathPrefix(u, newLink)) {
        return true;
      }
    }
    return false;
  
private voidwriteStringSet(java.lang.String s, java.util.Set skippedPaths)

    FileOutputStream fos = new FileOutputStream(s);
    PrintStream ps = new PrintStream(fos);
    Iterator it = skippedPaths.iterator();
    while (it.hasNext()) {
      ps.println(it.next());
    }
    fos.flush();
    fos.close();