FileDocCategorySizeDatePackage
ReadTag.javaAPI DocExample1952Fri May 05 11:05:22 BST 2000None

ReadTag.java

import java.io.*;
import java.net.*;

/** A simple but reusable HTML tag extractor.
 * @author Ian Darwin, Darwin Open Systems, www.darwinsys.com.
 * @version $Id: ReadTag.java,v 1.4 2000/05/05 14:05:23 ian Exp $
 */
public class ReadTag {
	/** The URL that this ReadTag object is reading */
	protected URL myURL = null;
	/** The Reader for this object */
	protected BufferedReader inrdr = null;
  
	/* Simple main showing one way of using the ReadTag class. */
	public static void main(String[] args) throws MalformedURLException, IOException {
		if (args.length == 0) {
			System.err.println("Usage: ReadTag URL [...]");
			return;
		}

		for (int i=0; i<args.length; i++) {
			ReadTag rt = new ReadTag(args[0]);
			String tag;
			while ((tag = rt.nextTag()) != null) {
				System.out.println(tag);
			}
			rt.close();
		}
	}
  
	/** Construct a ReadTag given a URL String */
	public ReadTag(String theURLString) throws 
			IOException, MalformedURLException {

		this(new URL(theURLString));
	}

	/** Construct a ReadTag given a URL */
	public ReadTag(URL theURL) throws IOException {
		myURL = theURL;
		// Open the URL for reading
		inrdr = new BufferedReader(new InputStreamReader(myURL.openStream()));
	}

	/** Read the next tag.  */
	public String nextTag() throws IOException {
		int i;
		while ((i = inrdr.read()) != -1) {
			char thisChar = (char)i;
			if (thisChar == '<') {
				String tag = readTag();
				return tag;
			}
		}
		return null;
	}

	public void close() throws IOException {
		inrdr.close();
	}

	/** Read one tag. Adapted from code by Elliotte Rusty Harold */
	protected String readTag() throws IOException {
		StringBuffer theTag = new StringBuffer("<");
		int i = '<';
	  
		while (i != '>' && (i = inrdr.read()) != -1) {
				theTag.append((char)i);
		}     
		return theTag.toString();
	}

	/* Return a String representation of this object */
	public String toString() {
		return "ReadTag[" + myURL.toString() + "]";
	}
}