import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import java.io.*;
import java.net.*;
public class LineBreakingTagStripper extends HTMLEditorKit.ParserCallback {
private Writer out;
private String lineSeparator;
public LineBreakingTagStripper(Writer out) {
this(out, System.getProperty("line.separator", "\r\n"));
}
public LineBreakingTagStripper(Writer out, String lineSeparator) {
this.out = out;
this.lineSeparator = lineSeparator;
}
public void handleText(char[] text, int position) {
try {
out.write(text);
out.flush();
}
catch (IOException e) {
System.err.println(e);
}
}
public void handleEndTag(HTML.Tag tag, int position) {
try {
if (tag.isBlock()) {
out.write(lineSeparator);
out.write(lineSeparator);
}
else if (tag.breaksFlow()) {
out.write(lineSeparator);
}
}
catch (IOException e) {
System.err.println(e);
}
}
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes,
int position) {
try {
if (tag.isBlock()) {
out.write(lineSeparator);
out.write(lineSeparator);
}
else if (tag.breaksFlow()) {
out.write(lineSeparator);
}
else {
out.write(' ');
}
}
catch (IOException e) {
System.err.println(e);
}
}
public static void main(String[] args) {
ParserGetter kit = new ParserGetter();
HTMLEditorKit.Parser parser = kit.getParser();
HTMLEditorKit.ParserCallback callback
= new LineBreakingTagStripper(new OutputStreamWriter(System.out));
try {
URL u = new URL(args[0]);
InputStream in = u.openStream();
InputStreamReader r = new InputStreamReader(in);
parser.parse(r, callback, false);
}
catch (IOException e) {
System.err.println(e);
}
}
} |