FileDocCategorySizeDatePackage
HTMLCollectionImpl.javaAPI DocApache Xerces 3.0.119148Fri Sep 14 20:33:54 BST 2007org.apache.html.dom

HTMLCollectionImpl.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.html.dom;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLAnchorElement;
import org.w3c.dom.html.HTMLAppletElement;
import org.w3c.dom.html.HTMLAreaElement;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFormElement;
import org.w3c.dom.html.HTMLImageElement;
import org.w3c.dom.html.HTMLObjectElement;
import org.w3c.dom.html.HTMLOptionElement;
import org.w3c.dom.html.HTMLTableCellElement;
import org.w3c.dom.html.HTMLTableRowElement;
import org.w3c.dom.html.HTMLTableSectionElement;

/**
 * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
 * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
 * look for is identified in the constructor by code. This collection is not
 * optimized for traversing large trees.
 * <p>
 * The collection has to meet two requirements: it has to be live, and it has
 * to traverse depth first and always return results in that order. As such,
 * using an object container (such as {@link java.util.Vector}) is expensive on
 * insert/remove operations. Instead, the collection has been implemented using
 * three traversing functions. As a result, operations on large documents will
 * result in traversal of the entire document tree and consume a considerable
 * amount of time.
 * <p>
 * Note that synchronization on the traversed document cannot be achieved.
 * The document itself cannot be locked, and locking each traversed node is
 * likely to lead to a dead lock condition. Therefore, there is a chance of the
 * document being changed as results are fetched; in all likelihood, the results
 * might be out dated, but not erroneous.
 * 
 * @xerces.internal
 * 
 * @version $Revision: 447255 $ $Date: 2006-09-18 01:36:42 -0400 (Mon, 18 Sep 2006) $
 * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
 * @see org.w3c.dom.html.HTMLCollection
 */
class HTMLCollectionImpl
    implements HTMLCollection
{
    

    /**
     * Request collection of all anchors in document: <A> elements that
     * have a <code>name</code> attribute.
     */
    static final short        ANCHOR = 1;
    
    
    /**
     * Request collection of all forms in document: <FORM> elements.
     */
    static final short        FORM = 2;
    
    
    /**
     * Request collection of all images in document: <IMAGE> elements.
     */
    static final short        IMAGE = 3;
    
    
    /**
     * Request collection of all Applets in document: <APPLET> and
     * <OBJECT> elements (<OBJECT> must contain an Applet).
     */
    static final short        APPLET = 4;
    
    
    /**
     * Request collection of all links in document: <A> and <AREA>
     * elements (must have a <code>href</code> attribute).
     */
    static final short        LINK = 5;
    
    
    /**
     * Request collection of all options in selection: <OPTION> elments in
     * <SELECT> or <OPTGROUP>.
     */
    static final short        OPTION = 6;
    
    
    /**
     * Request collection of all rows in table: <TR> elements in table or
     * table section.
     */
    static final short        ROW = 7;

    
    /**
     * Request collection of all form elements: <INPUT>, <BUTTON>,
     * <SELECT>, <TEXT> and <TEXTAREA> elements inside form
     * <FORM>.
     */
    static final short        ELEMENT = 8;
    
    
    /**
     * Request collection of all areas in map: <AREA> element in <MAP>
     * (non recursive).
     */
    static final short        AREA = -1;
    

    /**
     * Request collection of all table bodies in table: <TBODY> element in
     * table <TABLE> (non recursive).
     */
    static final short        TBODY = -2;

    
    /**
     * Request collection of all cells in row: <TD> elements in <TR>
     * (non recursive).
     */
    static final short        CELL = -3;

    
    /**
     * Indicates what this collection is looking for. Holds one of the enumerated
     * values and used by {@link #collectionMatch}. Set by the constructor and
     * determine the collection's use for its life time.
     */
    private short            _lookingFor;
    
    
    /**
     * This is the top level element underneath which the collection exists.
     */
    private Element            _topLevel;


    /**
     * Construct a new collection that retrieves element of the specific type
     * (<code>lookingFor</code>) from the specific document portion
     * (<code>topLevel</code>).
     * 
     * @param topLevel The element underneath which the collection exists
     * @param lookingFor Code indicating what elements to look for
     */
    HTMLCollectionImpl( HTMLElement topLevel, short lookingFor )
    {
        if ( topLevel == null )
            throw new NullPointerException( "HTM011 Argument 'topLevel' is null." );
        _topLevel = topLevel;
       _lookingFor = lookingFor;
    }
  
  
    /**
     * Returns the length of the collection. This method might traverse the
     * entire document tree.
     * 
     * @return Length of the collection
     */
    public final int getLength()
    {
        // Call recursive function on top-level element.
        return getLength( _topLevel );
    }


    /**
     * Retrieves the indexed node from the collection. Nodes are numbered in
     * tree order - depth-first traversal order. This method might traverse
     * the entire document tree.
     * 
     * @param index The index of the node to return
     * @return The specified node or null if no such node found
     */
    public final Node item( int index )
    {
        if ( index < 0 )
            throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." );
        // Call recursive function on top-level element.
        return item( _topLevel, new CollectionIndex( index ) );
    }
    
    
    /**
     * Retrieves the named node from the collection. The name is matched case
     * sensitive against the <TT>id</TT> attribute of each element in the
     * collection, returning the first match. The tree is traversed in
     * depth-first order. This method might traverse the entire document tree.
     * 
     * @param name The name of the node to return
     * @return The specified node or null if no such node found
     */
    public final Node namedItem( String name )
    {
        if ( name == null )
            throw new NullPointerException( "HTM013 Argument 'name' is null." );
        // Call recursive function on top-level element.
        return namedItem( _topLevel, name );
    }
    
    
    /**
     * Recursive function returns the number of elements of a particular type
     * that exist under the top level element. This is a recursive function
     * and the top level element is passed along.
     * 
     * @param topLevel Top level element from which to scan
     * @return Number of elements
     */
    private int getLength( Element topLevel )
    {
        int        length;
        Node    node;
    
        synchronized ( topLevel )
        {
            // Always count from zero and traverse all the childs of the
            // current element in the order they appear.
            length = 0;
            node = topLevel.getFirstChild();
            while ( node != null )
            {
                // If a particular node is an element (could be HTML or XML),
		// do two things: if it's the one we're looking for, count
		// another matched element; at any rate, traverse it's
		// children as well.
                if ( node instanceof Element )
                {
                    if ( collectionMatch( (Element) node, null ) )
                        ++ length;
                    else if ( recurse() )
                        length += getLength( (Element) node );
                }
                node = node.getNextSibling(); 
            }
        }
        return length;
    }
    
        
    /**
     * Recursive function returns the numbered element of a particular type
     * that exist under the top level element. This is a recursive function
     * and the top level element is passed along.
     * <p>
     * Note that this function must call itself with an index and get back both
     * the element (if one was found) and the new index which is decremeneted
     * for any like element found. Since integers are only passed by value,
     * this function makes use of a separate class ({@link CollectionIndex})
     * to hold that index.
     * 
     * @param topLevel Top level element from which to scan
     * @param index The index of the item to retreive
     * @return Number of elements
     * @see CollectionIndex
     */
    private Node item( Element topLevel, CollectionIndex index )
    {
        Node    node;
        Node    result;

        synchronized ( topLevel )
        {
            // Traverse all the childs of the current element in the order
	    // they appear. Count from the index backwards until you reach
	    // matching element with an index of zero. Return that element.
            node = topLevel.getFirstChild();
            while ( node != null )
            {
                // If a particular node is an element (could be HTML or XML),
		// do two things: if it's the one we're looking for, decrease
		// the index and if zero, return this node; at any rate,
		// traverse it's children as well.
                if ( node instanceof Element )
                {
                    if ( collectionMatch( (Element) node, null ) )
                    {
                        if ( index.isZero() )
                            return node;
                        index.decrement();
                    } else if ( recurse() )
                    {
                        result = item( (Element) node, index );
                        if ( result != null )
                            return result;
                    }
                }
                node = node.getNextSibling(); 
            }
        }
        return null;
    }
    
    
    /**
     * Recursive function returns an element of a particular type with the
     * specified name (<TT>id</TT> attribute).
     * 
     * @param topLevel Top level element from which to scan
     * @param name The named element to look for
     * @return The first named element found
     */
    private  Node namedItem( Element topLevel, String name )
    {
        Node    node;
        Node    result;

        synchronized ( topLevel )
        {
            // Traverse all the childs of the current element in the order
	    // they appear.
            node = topLevel.getFirstChild();
            while ( node != null )
            {
                // If a particular node is an element (could be HTML or XML),
                // do two things: if it's the one we're looking for, and the
		// name (id attribute) attribute is the one we're looking for,
		// return this element; otherwise, traverse it's children.
                if ( node instanceof Element )
                {
                    if ( collectionMatch( (Element) node, name ) )
                        return node;
                    else if ( recurse() )
                    {
                        result = namedItem( (Element) node, name );
                        if ( result != null )
                            return result;
                    }
                }
                node = node.getNextSibling(); 
            }
            return node;
        }
    }
    
    
    /**
     * Returns true if scanning methods should iterate through the collection.
     * When looking for elements in the document, recursing is needed to traverse
     * the full document tree. When looking inside a specific element (e.g. for a
     * cell inside a row), recursing can lead to erroneous results.
     * 
     * @return True if methods should recurse to traverse entire tree
     */
    protected boolean recurse()
    {
        return _lookingFor > 0;
    }
    

    /**
     * Determines if current element matches based on what we're looking for.
     * The element is passed along with an optional identifier name. If the
     * element is the one we're looking for, return true. If the name is also
     * specified, the name must match the <code>id</code> attribute
     * (match <code>name</code> first for anchors).
     * 
     * @param elem The current element
     * @param name The identifier name or null
     * @return The element matches what we're looking for
     */
    protected boolean collectionMatch( Element elem, String name )
    {
        boolean    match;
        
        synchronized ( elem )
        {
            // Begin with no matching. Depending on what we're looking for,
            // attempt to match based on the element type. This is the quickest
            // way to match involving only a cast. Do the expensive string
            // comparison later on.
            match = false;
            switch ( _lookingFor )
            {
            case ANCHOR:
                // Anchor is an <A> element with a 'name' attribute. Otherwise, it's
                // just a link.
                match = ( elem instanceof HTMLAnchorElement ) &&
                        elem.getAttribute( "name" ).length() > 0;
                break;
            case FORM:
                // Any <FORM> element.
                match = ( elem instanceof HTMLFormElement );
                break;
            case IMAGE:
                // Any <IMG> element. <OBJECT> elements with images are not returned.
                match = ( elem instanceof HTMLImageElement );
                break;
            case APPLET:
                // Any <APPLET> element, and any <OBJECT> element which represents an
                // Applet. This is determined by 'codetype' attribute being
                // 'application/java' or 'classid' attribute starting with 'java:'.
                match = ( elem instanceof HTMLAppletElement ) ||
                        ( elem instanceof HTMLObjectElement &&
                          ( "application/java".equals( elem.getAttribute( "codetype" ) ) ||
                            elem.getAttribute( "classid" ).startsWith( "java:" ) ) );
                break;
            case ELEMENT:
                // All form elements implement HTMLFormControl for easy identification.
                match = ( elem instanceof HTMLFormControl );
                break;
            case LINK:
                // Any <A> element, and any <AREA> elements with an 'href' attribute.
                match = ( ( elem instanceof HTMLAnchorElement ||
                            elem instanceof HTMLAreaElement ) &&
                          elem.getAttribute( "href" ).length() > 0 );
                break;
            case AREA:
                // Any <AREA> element.
                match = ( elem instanceof HTMLAreaElement );
                break;
            case OPTION:
                // Any <OPTION> element.
                match = ( elem instanceof HTMLOptionElement );
                break;
            case ROW:
                // Any <TR> element.
                match = ( elem instanceof HTMLTableRowElement );
                break;
            case TBODY:
                // Any <TBODY> element (one of three table section types).
                match = ( elem instanceof HTMLTableSectionElement &&
                          elem.getTagName().equals( "TBODY" ) );
                break;
            case CELL:
                // Any <TD> element.
                match = ( elem instanceof HTMLTableCellElement );
                break;
            }
        
            // If element type was matched and a name was specified, must also match
            // the name against either the 'id' or the 'name' attribute. The 'name'
            // attribute is relevant only for <A> elements for backward compatibility.
            if ( match && name != null )
            {
                // If an anchor and 'name' attribute matches, return true. Otherwise,
                // try 'id' attribute.
                if ( elem instanceof HTMLAnchorElement &&
                     name.equals( elem.getAttribute( "name" ) ) )
                    return true;
                match = name.equals( elem.getAttribute( "id" ) );
            }
        }
        return match;
    }

    
}


/**
 * {@link CollectionImpl#item} must traverse down the tree and decrement the
 * index until it matches an element who's index is zero. Since integers are
 * passed by value, this class servers to pass the index into each recursion
 * by reference. It encompasses all the operations that need be performed on
 * the index, although direct access is possible.
 * 
 * @xerces.internal
 * 
 * @see CollectionImpl#item
 */
class CollectionIndex
{
    
    
    /**
     * Returns the current index.
     * 
     * @return Current index
     */
    int getIndex()
    {
        return _index;
    }
    
    
    /**
     * Decrements the index by one.
     */
    void decrement()
    {
        -- _index;
    }
    
    
    /**
     * Returns true if index is zero (or negative).
     * 
     * @return True if index is zero
     */
    boolean isZero()
    {
        return _index <= 0;
    }
    
    
    /**
     * Constructs a new index with the specified initial value. The index will
     * then be decremeneted until it reaches zero.
     * 
     * @param index The initial value
     */
    CollectionIndex( int index )
    {
        _index = index;
    }
    
    
    /**
     * Holds the actual value that is passed by reference using this class.
     */
    private int        _index;
    

}