/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.html.dom;

import java.io.Serializable;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLAnchorElement;
import org.w3c.dom.html.HTMLAppletElement;
import org.w3c.dom.html.HTMLAreaElement;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFormElement;
import org.w3c.dom.html.HTMLImageElement;
import org.w3c.dom.html.HTMLObjectElement;
import org.w3c.dom.html.HTMLOptionElement;
import org.w3c.dom.html.HTMLTableCellElement;
import org.w3c.dom.html.HTMLTableRowElement;
import org.w3c.dom.html.HTMLTableSectionElement;

Implements HTMLCollection to traverse any named elements on a HTMLDocument. The elements type to look for is identified in the constructor by code. This collection is not optimized for traversing large trees.

The collection has to meet two requirements: it has to be live, and it has to traverse depth first and always return results in that order. As such, using an object container (such as Vector) is expensive on insert/remove operations. Instead, the collection has been implemented using three traversing functions. As a result, operations on large documents will result in traversal of the entire document tree and consume a considerable amount of time.

Note that synchronization on the traversed document cannot be achieved. The document itself cannot be locked, and locking each traversed node is likely to lead to a dead lock condition. Therefore, there is a chance of the document being changed as results are fetched; in all likelihood, the results might be out dated, but not erroneous.

Author:Assaf Arkin
See Also:
@xerces.internal
Version:$Revision: 1035042 $ $Date: 2010-11-15 00:11:49 +0530 (Mon, 15 Nov 2010) $
/** * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to * look for is identified in the constructor by code. This collection is not * optimized for traversing large trees. * <p> * The collection has to meet two requirements: it has to be live, and it has * to traverse depth first and always return results in that order. As such, * using an object container (such as {@link java.util.Vector}) is expensive on * insert/remove operations. Instead, the collection has been implemented using * three traversing functions. As a result, operations on large documents will * result in traversal of the entire document tree and consume a considerable * amount of time. * <p> * Note that synchronization on the traversed document cannot be achieved. * The document itself cannot be locked, and locking each traversed node is * likely to lead to a dead lock condition. Therefore, there is a chance of the * document being changed as results are fetched; in all likelihood, the results * might be out dated, but not erroneous. * * @xerces.internal * * @version $Revision: 1035042 $ $Date: 2010-11-15 00:11:49 +0530 (Mon, 15 Nov 2010) $ * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a> * @see org.w3c.dom.html.HTMLCollection */
class HTMLCollectionImpl implements HTMLCollection, Serializable { private static final long serialVersionUID = 9112122196669185082L;
Request collection of all anchors in document: <A> elements that have a name attribute.
/** * Request collection of all anchors in document: &lt;A&gt; elements that * have a <code>name</code> attribute. */
static final short ANCHOR = 1;
Request collection of all forms in document: <FORM> elements.
/** * Request collection of all forms in document: &lt;FORM&gt; elements. */
static final short FORM = 2;
Request collection of all images in document: <IMG> elements.
/** * Request collection of all images in document: &lt;IMG&gt; elements. */
static final short IMAGE = 3;
Request collection of all Applets in document: <APPLET> and <OBJECT> elements (<OBJECT> must contain an Applet).
/** * Request collection of all Applets in document: &lt;APPLET&gt; and * &lt;OBJECT&gt; elements (&lt;OBJECT&gt; must contain an Applet). */
static final short APPLET = 4;
Request collection of all links in document: <A> and <AREA> elements (must have a href attribute).
/** * Request collection of all links in document: &lt;A&gt; and &lt;AREA&gt; * elements (must have a <code>href</code> attribute). */
static final short LINK = 5;
Request collection of all options in selection: <OPTION> elements in <SELECT> or <OPTGROUP>.
/** * Request collection of all options in selection: &lt;OPTION&gt; elements in * &lt;SELECT&gt; or &lt;OPTGROUP&gt;. */
static final short OPTION = 6;
Request collection of all rows in table: <TR> elements in table or table section.
/** * Request collection of all rows in table: &lt;TR&gt; elements in table or * table section. */
static final short ROW = 7;
Request collection of all form elements: <INPUT>, <BUTTON>, <SELECT>, and <TEXTAREA> elements inside form <FORM>.
/** * Request collection of all form elements: &lt;INPUT&gt;, &lt;BUTTON&gt;, * &lt;SELECT&gt;, and &lt;TEXTAREA&gt; elements inside form &lt;FORM&gt;. */
static final short ELEMENT = 8;
Request collection of all areas in map: <AREA> element in <MAP> (non recursive).
/** * Request collection of all areas in map: &lt;AREA&gt; element in &lt;MAP&gt; * (non recursive). */
static final short AREA = -1;
Request collection of all table bodies in table: <TBODY> element in table <TABLE> (non recursive).
/** * Request collection of all table bodies in table: &lt;TBODY&gt; element in * table &lt;TABLE&gt; (non recursive). */
static final short TBODY = -2;
Request collection of all cells in row: <TD> and <TH> elements in <TR> (non recursive).
/** * Request collection of all cells in row: &lt;TD&gt; and &lt;TH&gt; * elements in &lt;TR&gt; (non recursive). */
static final short CELL = -3;
Indicates what this collection is looking for. Holds one of the enumerated values and used by collectionMatch. Set by the constructor and determine the collection's use for its life time.
/** * Indicates what this collection is looking for. Holds one of the enumerated * values and used by {@link #collectionMatch}. Set by the constructor and * determine the collection's use for its life time. */
private short _lookingFor;
This is the top level element underneath which the collection exists.
/** * This is the top level element underneath which the collection exists. */
private Element _topLevel;
Construct a new collection that retrieves element of the specific type (lookingFor) from the specific document portion (topLevel).
Params:
  • topLevel – The element underneath which the collection exists
  • lookingFor – Code indicating what elements to look for
/** * Construct a new collection that retrieves element of the specific type * (<code>lookingFor</code>) from the specific document portion * (<code>topLevel</code>). * * @param topLevel The element underneath which the collection exists * @param lookingFor Code indicating what elements to look for */
HTMLCollectionImpl( HTMLElement topLevel, short lookingFor ) { if ( topLevel == null ) throw new NullPointerException( "HTM011 Argument 'topLevel' is null." ); _topLevel = topLevel; _lookingFor = lookingFor; }
Returns the length of the collection. This method might traverse the entire document tree.
Returns:Length of the collection
/** * Returns the length of the collection. This method might traverse the * entire document tree. * * @return Length of the collection */
public final int getLength() { // Call recursive function on top-level element. return getLength( _topLevel ); }
Retrieves the indexed node from the collection. Nodes are numbered in tree order - depth-first traversal order. This method might traverse the entire document tree.
Params:
  • index – The index of the node to return
Returns:The specified node or null if no such node found
/** * Retrieves the indexed node from the collection. Nodes are numbered in * tree order - depth-first traversal order. This method might traverse * the entire document tree. * * @param index The index of the node to return * @return The specified node or null if no such node found */
public final Node item( int index ) { if ( index < 0 ) throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." ); // Call recursive function on top-level element. return item( _topLevel, new CollectionIndex( index ) ); }
Retrieves the named node from the collection. The name is matched case sensitive against the id attribute of each element in the collection, returning the first match. The tree is traversed in depth-first order. This method might traverse the entire document tree.
Params:
  • name – The name of the node to return
Returns:The specified node or null if no such node found
/** * Retrieves the named node from the collection. The name is matched case * sensitive against the <TT>id</TT> attribute of each element in the * collection, returning the first match. The tree is traversed in * depth-first order. This method might traverse the entire document tree. * * @param name The name of the node to return * @return The specified node or null if no such node found */
public final Node namedItem( String name ) { if ( name == null ) throw new NullPointerException( "HTM013 Argument 'name' is null." ); // Call recursive function on top-level element. return namedItem( _topLevel, name ); }
Recursive function returns the number of elements of a particular type that exist under the top level element. This is a recursive function and the top level element is passed along.
Params:
  • topLevel – Top level element from which to scan
Returns:Number of elements
/** * Recursive function returns the number of elements of a particular type * that exist under the top level element. This is a recursive function * and the top level element is passed along. * * @param topLevel Top level element from which to scan * @return Number of elements */
private int getLength( Element topLevel ) { int length; Node node; synchronized ( topLevel ) { // Always count from zero and traverse all the childs of the // current element in the order they appear. length = 0; node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, count // another matched element; at any rate, traverse it's // children as well. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, null ) ) ++ length; else if ( recurse() ) length += getLength( (Element) node ); } node = node.getNextSibling(); } } return length; }
Recursive function returns the numbered element of a particular type that exist under the top level element. This is a recursive function and the top level element is passed along.

Note that this function must call itself with an index and get back both the element (if one was found) and the new index which is decremeneted for any like element found. Since integers are only passed by value, this function makes use of a separate class (CollectionIndex) to hold that index.

Params:
  • topLevel – Top level element from which to scan
  • index – The index of the item to retreive
See Also:
Returns:Number of elements
/** * Recursive function returns the numbered element of a particular type * that exist under the top level element. This is a recursive function * and the top level element is passed along. * <p> * Note that this function must call itself with an index and get back both * the element (if one was found) and the new index which is decremeneted * for any like element found. Since integers are only passed by value, * this function makes use of a separate class ({@link CollectionIndex}) * to hold that index. * * @param topLevel Top level element from which to scan * @param index The index of the item to retreive * @return Number of elements * @see CollectionIndex */
private Node item( Element topLevel, CollectionIndex index ) { Node node; Node result; synchronized ( topLevel ) { // Traverse all the childs of the current element in the order // they appear. Count from the index backwards until you reach // matching element with an index of zero. Return that element. node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, decrease // the index and if zero, return this node; at any rate, // traverse it's children as well. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, null ) ) { if ( index.isZero() ) return node; index.decrement(); } else if ( recurse() ) { result = item( (Element) node, index ); if ( result != null ) return result; } } node = node.getNextSibling(); } } return null; }
Recursive function returns an element of a particular type with the specified name (id attribute).
Params:
  • topLevel – Top level element from which to scan
  • name – The named element to look for
Returns:The first named element found
/** * Recursive function returns an element of a particular type with the * specified name (<TT>id</TT> attribute). * * @param topLevel Top level element from which to scan * @param name The named element to look for * @return The first named element found */
private Node namedItem( Element topLevel, String name ) { Node node; Node result; synchronized ( topLevel ) { // Traverse all the childs of the current element in the order // they appear. node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, and the // name (id attribute) attribute is the one we're looking for, // return this element; otherwise, traverse it's children. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, name ) ) return node; else if ( recurse() ) { result = namedItem( (Element) node, name ); if ( result != null ) return result; } } node = node.getNextSibling(); } return node; } }
Returns true if scanning methods should iterate through the collection. When looking for elements in the document, recursing is needed to traverse the full document tree. When looking inside a specific element (e.g. for a cell inside a row), recursing can lead to erroneous results.
Returns:True if methods should recurse to traverse entire tree
/** * Returns true if scanning methods should iterate through the collection. * When looking for elements in the document, recursing is needed to traverse * the full document tree. When looking inside a specific element (e.g. for a * cell inside a row), recursing can lead to erroneous results. * * @return True if methods should recurse to traverse entire tree */
protected boolean recurse() { return _lookingFor > 0; }
Determines if current element matches based on what we're looking for. The element is passed along with an optional identifier name. If the element is the one we're looking for, return true. If the name is also specified, the name must match the id attribute (match name first for anchors).
Params:
  • elem – The current element
  • name – The identifier name or null
Returns:The element matches what we're looking for
/** * Determines if current element matches based on what we're looking for. * The element is passed along with an optional identifier name. If the * element is the one we're looking for, return true. If the name is also * specified, the name must match the <code>id</code> attribute * (match <code>name</code> first for anchors). * * @param elem The current element * @param name The identifier name or null * @return The element matches what we're looking for */
protected boolean collectionMatch( Element elem, String name ) { boolean match; synchronized ( elem ) { // Begin with no matching. Depending on what we're looking for, // attempt to match based on the element type. This is the quickest // way to match involving only a cast. Do the expensive string // comparison later on. match = false; switch ( _lookingFor ) { case ANCHOR: // Anchor is an <A> element with a 'name' attribute. Otherwise, it's // just a link. match = ( elem instanceof HTMLAnchorElement ) && elem.getAttribute( "name" ).length() > 0; break; case FORM: // Any <FORM> element. match = ( elem instanceof HTMLFormElement ); break; case IMAGE: // Any <IMG> element. <OBJECT> elements with images are not returned. match = ( elem instanceof HTMLImageElement ); break; case APPLET: // Any <APPLET> element, and any <OBJECT> element which represents an // Applet. This is determined by 'codetype' attribute being // 'application/java' or 'classid' attribute starting with 'java:'. match = ( elem instanceof HTMLAppletElement ) || ( elem instanceof HTMLObjectElement && ( "application/java".equals( elem.getAttribute( "codetype" ) ) || elem.getAttribute( "classid" ).startsWith( "java:" ) ) ); break; case ELEMENT: // All form elements implement HTMLFormControl for easy identification. match = ( elem instanceof HTMLFormControl ); break; case LINK: // Any <A> element, and any <AREA> elements with an 'href' attribute. match = ( ( elem instanceof HTMLAnchorElement || elem instanceof HTMLAreaElement ) && elem.getAttribute( "href" ).length() > 0 ); break; case AREA: // Any <AREA> element. match = ( elem instanceof HTMLAreaElement ); break; case OPTION: // Any <OPTION> element. match = ( elem instanceof HTMLOptionElement ); break; case ROW: // Any <TR> element. match = ( elem instanceof HTMLTableRowElement ); break; case TBODY: // Any <TBODY> element (one of three table section types). match = ( elem instanceof HTMLTableSectionElement && elem.getTagName().equals( "TBODY" ) ); break; case CELL: // Any <TD> or <TH> element. match = ( elem instanceof HTMLTableCellElement ); break; } // If element type was matched and a name was specified, must also match // the name against either the 'id' or the 'name' attribute. The 'name' // attribute is relevant only for <A> elements for backward compatibility. if ( match && name != null ) { // If an anchor and 'name' attribute matches, return true. Otherwise, // try 'id' attribute. if ( elem instanceof HTMLAnchorElement && name.equals( elem.getAttribute( "name" ) ) ) return true; match = name.equals( elem.getAttribute( "id" ) ); } } return match; } }
item.item must traverse down the tree and decrement the index until it matches an element who's index is zero. Since integers are passed by value, this class servers to pass the index into each recursion by reference. It encompasses all the operations that need be performed on the index, although direct access is possible.
See Also:
  • item.item
@xerces.internal
/** * {@link CollectionImpl#item} must traverse down the tree and decrement the * index until it matches an element who's index is zero. Since integers are * passed by value, this class servers to pass the index into each recursion * by reference. It encompasses all the operations that need be performed on * the index, although direct access is possible. * * @xerces.internal * * @see CollectionImpl#item */
class CollectionIndex {
Returns the current index.
Returns:Current index
/** * Returns the current index. * * @return Current index */
int getIndex() { return _index; }
Decrements the index by one.
/** * Decrements the index by one. */
void decrement() { -- _index; }
Returns true if index is zero (or negative).
Returns:True if index is zero
/** * Returns true if index is zero (or negative). * * @return True if index is zero */
boolean isZero() { return _index <= 0; }
Constructs a new index with the specified initial value. The index will then be decremeneted until it reaches zero.
Params:
  • index – The initial value
/** * Constructs a new index with the specified initial value. The index will * then be decremeneted until it reaches zero. * * @param index The initial value */
CollectionIndex( int index ) { _index = index; }
Holds the actual value that is passed by reference using this class.
/** * Holds the actual value that is passed by reference using this class. */
private int _index; }