import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import java.io.*;
import java.net.*;
public class TagStripper extends HTMLEditorKit.ParserCallback {
private Writer out;
public TagStripper(Writer out) {
this.out = out;
}
public void handleText(char[] text, int position) {
try {
for (int i =0; i < text.length; i++) {
if (text[i] == '\r' || text [i] == '\n') {
System.out.println("**********************");
}
}
out.write(text);
// out.flush();
}
catch (IOException e) {
System.err.println(e);
}
}
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes,
int position) {
try {
out.write(' ');
// out.flush();
}
catch (IOException e) {
System.err.println(e);
}
}
public void handleEndTag(HTML.Tag tag, int position) {
try {
out.write(' ');
// out.flush();
}
catch (IOException e) {
System.err.println(e);
}
}
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes,
int position) {
try {
out.write(' ');
// out.flush();
}
catch (IOException e) {
System.err.println(e);
}
}
public static void main(String[] args) {
ParserGetter kit = new ParserGetter();
HTMLEditorKit.Parser parser = new ParserDelegator(); //kit.getParser();
HTMLEditorKit.ParserCallback callback
= new TagStripper(new OutputStreamWriter(System.out));
try {
URL u = new URL(args[0]);
InputStream in = u.openStream();
InputStreamReader r = new InputStreamReader(in);
parser.parse(r, callback, false);
}
catch (IOException e) {
System.err.println(e);
}
}
} |