FileDocCategorySizeDatePackage
Links.javaAPI DocExample3239Thu Apr 05 20:38:58 BST 2001None

Links

public class Links extends HttpServlet

Fields Summary
Constructors Summary
Methods Summary
public voiddoGet(javax.servlet.http.HttpServletRequest req, javax.servlet.http.HttpServletResponse res)

    res.setContentType("text/html");
    PrintWriter out = res.getWriter();

    // We accept the URL to process as extra path info
    // http://localhost:8080/servlet/Links/http://www.servlets.com/
    String url = req.getPathInfo();
    if (url == null || url.length() == 0) {
      res.sendError(res.SC_BAD_REQUEST,
                    "Please pass a URL to read from as extra path info");
      return;
    }
    url = url.substring(1);  // cut off leading '/'

    String page = null;
    try {
      // Request the page
      HttpMessage msg = new HttpMessage(new URL(url));
      BufferedReader in =
        new BufferedReader(new InputStreamReader(msg.sendGetMessage()));
  
      // Read the entire response into a String
      StringBuffer buf = new StringBuffer(10240);
      char[] chars = new char[10240];
      int charsRead = 0;
      while ((charsRead = in.read(chars, 0, chars.length)) != -1) {
        buf.append(chars, 0, charsRead);
      }
      page = buf.toString();
    }
    catch (IOException e) {
      res.sendError(res.SC_NOT_FOUND,
                    "Link Extractor could not read from " + url + ":<BR>" +
                    ServletUtils.getStackTraceAsString(e));
      return;
    }

    out.println("<HTML><HEAD><TITLE>Link Extractor</TITLE>");

    try {
      // We need to specify a <BASE> so relative links work correctly
      // If the page already has one, we can use that
      RE re = new RE("<base[^>]*>", RE.MATCH_CASEINDEPENDENT);
      boolean hasBase = re.match(page);

      if (hasBase) {
        // Use the existing <BASE>
        out.println(re.getParen(0));
      }
      else {
        // Calculate the base from the URL, use everything up to last '/'
        re = new RE("http://.*/", RE.MATCH_CASEINDEPENDENT);
        boolean extractedBase = re.match(url);
        if (extractedBase) {
          // Success, print the calculated base
          out.println("<BASE HREF=\"" + re.getParen(0) + "\">");
        }
        else {
          // No trailing slash, add one ourselves
          out.println("<BASE HREF=\"" + url + "/" + "\">");
        }
      }

      out.println("</HEAD><BODY>");

      out.println("The links on <A HREF=\"" + url + "\">" + url + "</A>" +
                  " are: <BR>");
      out.println("<UL>");

      String search = "<a\\s+[^<]*</a\\s*>";
      re = new RE(search, RE.MATCH_CASEINDEPENDENT);

      int index = 0;
      while (re.match(page, index)) {
        String match = re.getParen(0);
        index = re.getParenEnd(0);
        out.println("<LI>" + match + "<BR>");
      }

      out.println("</UL>");
      out.println("</BODY></HTML>");
    }
    catch (RESyntaxException e) {
      // Should never happen as the search strings are hard coded
      e.printStackTrace(out);
    }