FileDocCategorySizeDatePackage
SymbolTable.javaAPI DocJava SE 6 API11294Tue Jun 10 00:22:52 BST 2008com.sun.org.apache.xerces.internal.util

SymbolTable.java

/*
 * The contents of this file are subject to the terms
 * of the Common Development and Distribution License
 * (the "License").  You may not use this file except
 * in compliance with the License.
 *
 * You can obtain a copy of the license at
 * https://jaxp.dev.java.net/CDDLv1.0.html.
 * See the License for the specific language governing
 * permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL
 * HEADER in each file and include the License file at
 * https://jaxp.dev.java.net/CDDLv1.0.html
 * If applicable add the following below this CDDL HEADER
 * with the fields enclosed by brackets "[]" replaced with
 * your own identifying information: Portions Copyright
 * [year] [name of copyright owner]
 */

/*
 * $Id: XMLEntityReader.java,v 1.3 2005/11/03 17:02:21 jeffsuttor Exp $
 * @(#)SymbolTable.java	1.11 05/11/17
 *
 * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
 */

/*
 * Copyright 2005 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sun.org.apache.xerces.internal.util;

/**
 * This class is a symbol table implementation that guarantees that
 * strings used as identifiers are unique references. Multiple calls
 * to <code>addSymbol</code> will always return the same string
 * reference.
 * <p>
 * The symbol table performs the same task as <code>String.intern()</code>
 * with the following differences:
 * <ul>
 *  <li>
 *   A new string object does not need to be created in order to
 *   retrieve a unique reference. Symbols can be added by using
 *   a series of characters in a character array.
 *  </li>
 *  <li>
 *   Users of the symbol table can provide their own symbol hashing
 *   implementation. For example, a simple string hashing algorithm
 *   may fail to produce a balanced set of hashcodes for symbols
 *   that are <em>mostly</em> unique. Strings with similar leading
 *   characters are especially prone to this poor hashing behavior.
 *  </li>
 * </ul>
 *
 * @see SymbolHash
 *
 * @author Andy Clark
 *
 * @version $Id: SymbolTable.java,v 1.3 2005/10/03 14:55:36 sunithareddy Exp $
 */
public class SymbolTable {
    
    //
    // Constants
    //
    
    /** Default table size. */
    protected static final int TABLE_SIZE = 173;
    
        
    /** Buckets. */
    protected Entry[] fBuckets = null;
    
    // actual table size
    protected int fTableSize;
    
    //
    // Constructors
    //
    
    /** Constructs a symbol table with a default number of buckets. */
    public SymbolTable() {
        this(TABLE_SIZE);
    }
    
    /** Constructs a symbol table with a specified number of buckets. */
    public SymbolTable(int tableSize) {
        fTableSize = tableSize;
        fBuckets = new Entry[fTableSize];
    }
    
    //
    // Public methods
    //
    
    /**
     * Adds the specified symbol to the symbol table and returns a
     * reference to the unique symbol. If the symbol already exists,
     * the previous symbol reference is returned instead, in order
     * guarantee that symbol references remain unique.
     *
     * @param symbol The new symbol.
     */
    public String addSymbol(String symbol) {
        
        // search for identical symbol
        final int hash = hash(symbol);
        final int bucket = hash % fTableSize;
        final int length = symbol.length();
        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
            if (length == entry.characters.length && hash == entry.hashCode) {
                if(symbol.regionMatches(0,entry.symbol,0,length)){
                    return entry.symbol;                    
                }
                else{
                    continue OUTER;
                }
                /**
                for (int i = 0; i < length; i++) {
                    if (symbol.charAt(i) != entry.characters[i]) {
                        continue OUTER;
                    }
                }                 
                symbolAsArray = entry.characters;
                return entry.symbol;
                 */
            }
        }
        
        // create new entry
        Entry entry = new Entry(symbol, fBuckets[bucket]);
        entry.hashCode = hash;
        fBuckets[bucket] = entry;
        return entry.symbol;
        
    } // addSymbol(String):String
    
    /**
     * Adds the specified symbol to the symbol table and returns a
     * reference to the unique symbol. If the symbol already exists,
     * the previous symbol reference is returned instead, in order
     * guarantee that symbol references remain unique.
     *
     * @param buffer The buffer containing the new symbol.
     * @param offset The offset into the buffer of the new symbol.
     * @param length The length of the new symbol in the buffer.
     */
    public String addSymbol(char[] buffer, int offset, int length) {
        // search for identical symbol
        int hash = hash(buffer, offset, length);
        int bucket = hash % fTableSize;
        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
            if (length == entry.characters.length && hash ==entry.hashCode) {
                for (int i = 0; i < length; i++) {
                    if (buffer[offset + i] != entry.characters[i]) {
                        continue OUTER;
                    }
                }
                return entry.symbol;
            }
        }
        
        // add new entry
        Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
        fBuckets[bucket] = entry;
        entry.hashCode = hash;
        return entry.symbol;
        
    } // addSymbol(char[],int,int):String
    
    /**
     * Returns a hashcode value for the specified symbol. The value
     * returned by this method must be identical to the value returned
     * by the <code>hash(char[],int,int)</code> method when called
     * with the character array that comprises the symbol string.
     *
     * @param symbol The symbol to hash.
     */
    public int hash(String symbol) {
        
        int code = 0;
        int length = symbol.length();
        for (int i = 0; i < length; i++) {
            code = code * 37 + symbol.charAt(i);
        }
        return code & 0x7FFFFFF;
        
    } // hash(String):int
    
    /**
     * Returns a hashcode value for the specified symbol information.
     * The value returned by this method must be identical to the value
     * returned by the <code>hash(String)</code> method when called
     * with the string object created from the symbol information.
     *
     * @param buffer The character buffer containing the symbol.
     * @param offset The offset into the character buffer of the start
     *               of the symbol.
     * @param length The length of the symbol.
     */
    public int hash(char[] buffer, int offset, int length) {
        
        int code = 0;
        for (int i = 0; i < length; i++) {
            code = code * 37 + buffer[offset + i];
        }
        return code & 0x7FFFFFF;
        
    } // hash(char[],int,int):int
    
    /**
     * Returns true if the symbol table already contains the specified
     * symbol.
     *
     * @param symbol The symbol to look for.
     */
    public boolean containsSymbol(String symbol) {
        
        // search for identical symbol
        int hash = hash(symbol);
        int bucket = hash % fTableSize;
        int length = symbol.length();
        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
            if (length == entry.characters.length && hash == entry.hashCode) {
                if(symbol.regionMatches(0,entry.symbol,0,length)){
                    return true;
                }
                else {
                    continue OUTER;
                }
                /**
                for (int i = 0; i < length; i++) {
                    if (symbol.charAt(i) != entry.characters[i]) {
                        continue OUTER;
                    }
                }
                 return true;
                 */                                
            }
        }
        
        return false;
        
    } // containsSymbol(String):boolean
    
    /**
     * Returns true if the symbol table already contains the specified
     * symbol.
     *
     * @param buffer The buffer containing the symbol to look for.
     * @param offset The offset into the buffer.
     * @param length The length of the symbol in the buffer.
     */
    public boolean containsSymbol(char[] buffer, int offset, int length) {
        
        // search for identical symbol
        int hash = hash(buffer, offset, length) ;
        int bucket = hash % fTableSize;
        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
            if (length == entry.characters.length && hash == entry.hashCode) {
                for (int i = 0; i < length; i++) {
                    if (buffer[offset + i] != entry.characters[i]) {
                        continue OUTER;
                    }
                }
                return true;
            }
        }
        
        return false;
        
    } // containsSymbol(char[],int,int):boolean
    
       
    //
    // Classes
    //
    
    /**
     * This class is a symbol table entry. Each entry acts as a node
     * in a linked list.
     */
    protected static final class Entry {
        
        //
        // Data
        //
        
        /** Symbol. */
        public String symbol;
        int hashCode = 0;
        
        /**
         * Symbol characters. This information is duplicated here for
         * comparison performance.
         */
        public char[] characters;
        
        /** The next entry. */
        public Entry next;
        
        //
        // Constructors
        //
        
        /**
         * Constructs a new entry from the specified symbol and next entry
         * reference.
         */
        public Entry(String symbol, Entry next) {
            this.symbol = symbol.intern();
            characters = new char[symbol.length()];
            symbol.getChars(0, characters.length, characters, 0);
            this.next = next;
        }
        
        /**
         * Constructs a new entry from the specified symbol information and
         * next entry reference.
         */
        public Entry(char[] ch, int offset, int length, Entry next) {
            characters = new char[length];
            System.arraycopy(ch, offset, characters, 0, length);                
            symbol = new String(characters).intern();
            this.next = next;
        }
        
    } // class Entry
    
} // class SymbolTable