FileDocCategorySizeDatePackage
DocumentCondenser.javaAPI DocExample4735Thu Dec 15 21:10:42 GMT 2005com.oreilly.jent.xml

DocumentCondenser

public class DocumentCondenser extends Object
In general, you may use the code in this book in your programs and documentation. You do not need to contact us for permission unless you're reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from this book does not require permission. Selling or distributing a CD-ROM of examples from O'Reilly books does require permission. Answering a question by citing this book and quoting example code does not require permission. Incorporating a significant amount of example code from this book into your product's documentation does require permission. We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN. For example: "Java Enterprise in a Nutshell, Third Edition, by Jim Farley and William Crawford with Prakash Malani, John G. Norman, and Justin Gehtland. Copyright 2006 O'Reilly Media, Inc., 0-596-10142-2." If you feel your use of code examples falls outside fair use or the permission given above, feel free to contact us at permissions@oreilly.com.

Fields Summary
Constructors Summary
Methods Summary
private static booleankeepText(org.w3c.dom.Node parentNode)

    if (parentNode == null) return true; // top level
    
    String parentName = parentNode.getLocalName();
    if ((parentName.equalsIgnoreCase("em")) ||
        (parentName.equalsIgnoreCase("title")) ||
        (parentName.equalsIgnoreCase("b")) ||
        (parentName.equalsIgnoreCase("li")) ||
        (parentName.equalsIgnoreCase("th")) ||
        ((parentName.toLowerCase().startsWith("h")) &&
         (parentName.length() == 2))) {
      return true;
    }
    
    if ((parentNode.getNodeType() == Node.ELEMENT_NODE) &&
        (parentName.equalsIgnoreCase("font"))) {
      NamedNodeMap atts = parentNode.getAttributes();
      if (atts != null) {
        Node sizeNode = atts.getNamedItem("size"); //get an attribue Node
        if (sizeNode != null) {
          if (sizeNode.getNodeValue().startsWith("+")) {
            return true;
          }
        }
      }
      
    }
    return false;
  
public static voidmain(java.lang.String[] args)

    
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(  );
    
    // For HTML, we don't want to validate without a DTD
    dbf.setValidating(false);
    // Ignore text elements that are completely empty:
    dbf.setIgnoringElementContentWhitespace(false);
    dbf.setExpandEntityReferences(true);
    dbf.setCoalescing(true);
    
    // Ensure that getLocalName() returns the HTML element name
    dbf.setNamespaceAware(true);
    
    DocumentBuilder db = null;
    try {
      db = dbf.newDocumentBuilder(  );
    } 
    catch (ParserConfigurationException pce) {
      pce.printStackTrace();
      return;
    }
    
    Document html = null;
    try {
      html = db.parse("enterprisexml.html");
      process(html);
      
      // Use the XSLT Transformer to see the output
      TransformerFactory tf = TransformerFactory.newInstance();
      Transformer output = tf.newTransformer();
      output.transform(new DOMSource(html), new StreamResult(System.out));
    } 
    catch (Exception ex) {
      ex.printStackTrace();
      return;
    }    
  
private static voidprocess(org.w3c.dom.Node node)

    
    Node c = null;
    Node delNode = null;
    
    for (c = node.getFirstChild(); c != null; c = c.getNextSibling()) {
      if (delNode != null) {
        delNode.getParentNode().removeChild(delNode);
      }
      delNode = null;
      if ((c.getNodeType() == Node.TEXT_NODE) &&
          (!keepText(c.getParentNode()))) {
        delNode = c;
      } 
      else if (c.getNodeType() != Node.TEXT_NODE) {
        process(c);
      }
    } // End For
    
    if (delNode != null) // Delete, if the last child was text
      delNode.getParentNode().removeChild(delNode);