FileDocCategorySizeDatePackage
REUtil.javaAPI DocJava SE 5 API14193Fri Aug 26 14:55:50 BST 2005com.sun.org.apache.xerces.internal.impl.xpath.regex

REUtil.java

/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999-2002 The Apache Software Foundation.  All rights 
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:  
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written 
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

package com.sun.org.apache.xerces.internal.impl.xpath.regex;

import java.text.CharacterIterator;

/**
 * @version $Id: REUtil.java,v 1.7 2002/11/20 00:49:47 twl Exp $
 */

public final class REUtil {
    private REUtil() {
    }

    static final int composeFromSurrogates(int high, int low) {
        return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
    }

    static final boolean isLowSurrogate(int ch) {
        return (ch & 0xfc00) == 0xdc00;
    }

    static final boolean isHighSurrogate(int ch) {
        return (ch & 0xfc00) == 0xd800;
    }

    static final String decomposeToSurrogates(int ch) {
        char[] chs = new char[2];
        ch -= 0x10000;
        chs[0] = (char)((ch>>10)+0xd800);
        chs[1] = (char)((ch&0x3ff)+0xdc00);
        return new String(chs);
    }

    static final String substring(CharacterIterator iterator, int begin, int end) {
        char[] src = new char[end-begin];
        for (int i = 0;  i < src.length;  i ++)
            src[i] = iterator.setIndex(i+begin);
        return new String(src);
    }

    // ================================================================

    static final int getOptionValue(int ch) {
        int ret = 0;
        switch (ch) {
          case 'i':
            ret = RegularExpression.IGNORE_CASE;
            break;
          case 'm':
            ret = RegularExpression.MULTIPLE_LINES;
            break;
          case 's':
            ret = RegularExpression.SINGLE_LINE;
            break;
          case 'x':
            ret = RegularExpression.EXTENDED_COMMENT;
            break;
          case 'u':
            ret = RegularExpression.USE_UNICODE_CATEGORY;
            break;
          case 'w':
            ret = RegularExpression.UNICODE_WORD_BOUNDARY;
            break;
          case 'F':
            ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
            break;
          case 'H':
            ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
            break;
          case 'X':
            ret = RegularExpression.XMLSCHEMA_MODE;
            break;
          case ',':
            ret = RegularExpression.SPECIAL_COMMA;
            break;
          default:
        }
        return ret;
    }

    static final int parseOptions(String opts) throws ParseException {
        if (opts == null)  return 0;
        int options = 0;
        for (int i = 0;  i < opts.length();  i ++) {
            int v = getOptionValue(opts.charAt(i));
            if (v == 0)
                throw new ParseException("Unknown Option: "+opts.substring(i), -1);
            options |= v;
        }
        return options;
    }

    static final String createOptionString(int options) {
        StringBuffer sb = new StringBuffer(9);
        if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
            sb.append((char)'F');
        if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
            sb.append((char)'H');
        if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
            sb.append((char)'X');
        if ((options & RegularExpression.IGNORE_CASE) != 0)
            sb.append((char)'i');
        if ((options & RegularExpression.MULTIPLE_LINES) != 0)
            sb.append((char)'m');
        if ((options & RegularExpression.SINGLE_LINE) != 0)
            sb.append((char)'s');
        if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
            sb.append((char)'u');
        if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
            sb.append((char)'w');
        if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
            sb.append((char)'x');
        if ((options & RegularExpression.SPECIAL_COMMA) != 0)
            sb.append((char)',');
        return sb.toString().intern();
    }

    // ================================================================

    static String stripExtendedComment(String regex) {
        int len = regex.length();
        StringBuffer buffer = new StringBuffer(len);
        int offset = 0;
        while (offset < len) {
            int ch = regex.charAt(offset++);
                                                // Skips a white space.
            if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
                continue;

            if (ch == '#') {                    // Skips chracters between '#' and a line end.
                while (offset < len) {
                    ch = regex.charAt(offset++);
                    if (ch == '\r' || ch == '\n')
                        break;
                }
                continue;
            }

            int next;                           // Strips an escaped white space.
            if (ch == '\\' && offset < len) {
                if ((next = regex.charAt(offset)) == '#'
                    || next == '\t' || next == '\n' || next == '\f'
                    || next == '\r' || next == ' ') {
                    buffer.append((char)next);
                    offset ++;
                } else {                        // Other escaped character.
                    buffer.append((char)'\\');
                    buffer.append((char)next);
                    offset ++;
                }
            } else                              // As is.
                buffer.append((char)ch);
        }
        return buffer.toString();
    }

    // ================================================================

    /**
     * Sample entry.
     * <div>Usage: <KBD>com.sun.org.apache.xerces.internal.utils.regex.REUtil <regex> <string></KBD></div>
     */
    public static void main(String[] argv) {
        String pattern = null;
        try {
            String options = "";
            String target = null;
            if( argv.length == 0 ) {
                System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
                System.exit( 0 );
            }
            for (int i = 0;  i < argv.length;  i ++) {
                if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
                    if (pattern == null)
                        pattern = argv[i];
                    else if (target == null)
                        target = argv[i];
                    else
                        System.err.println("Unnecessary: "+argv[i]);
                } else if (argv[i].equals("-i")) {
                    options += "i";
                } else if (argv[i].equals("-m")) {
                    options += "m";
                } else if (argv[i].equals("-s")) {
                    options += "s";
                } else if (argv[i].equals("-u")) {
                    options += "u";
                } else if (argv[i].equals("-w")) {
                    options += "w";
                } else if (argv[i].equals("-X")) {
                    options += "X";
                } else {
                    System.err.println("Unknown option: "+argv[i]);
                }
            }
            RegularExpression reg = new RegularExpression(pattern, options);
            System.out.println("RegularExpression: "+reg);
            Match match = new Match();
            reg.matches(target, match);
            for (int i = 0;  i < match.getNumberOfGroups();  i ++) {
                if (i == 0 )  System.out.print("Matched range for the whole pattern: ");
                else System.out.print("["+i+"]: ");
                if (match.getBeginning(i) < 0)
                    System.out.println("-1");
                else {
                    System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
                    System.out.println("\""+match.getCapturedText(i)+"\"");
                }
            }
        } catch (ParseException pe) {
            if (pattern == null) {
                pe.printStackTrace();
            } else {
                System.err.println("com.sun.org.apache.xerces.internal.utils.regex.ParseException: "+pe.getMessage());
                String indent = "        ";
                System.err.println(indent+pattern);
                int loc = pe.getLocation();
                if (loc >= 0) {
                    System.err.print(indent);
                    for (int i = 0;  i < loc;  i ++)  System.err.print("-");
                    System.err.println("^");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    static final int CACHESIZE = 20;
    static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
    /**
     * Creates a RegularExpression instance.
     * This method caches created instances.
     *
     * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
     */
    public static RegularExpression createRegex(String pattern, String options)
        throws ParseException {
        RegularExpression re = null;
        int intOptions = REUtil.parseOptions(options);
        synchronized (REUtil.regexCache) {
            int i;
            for (i = 0;  i < REUtil.CACHESIZE;  i ++) {
                RegularExpression cached = REUtil.regexCache[i];
                if (cached == null) {
                    i = -1;
                    break;
                }
                if (cached.equals(pattern, intOptions)) {
                    re = cached;
                    break;
                }
            }
            if (re != null) {
                if (i != 0) {
                    System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
                    REUtil.regexCache[0] = re;
                }
            } else {
                re = new RegularExpression(pattern, options);
                System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
                REUtil.regexCache[0] = re;
            }
        }
        return re;
    }

    /**
     *
     * @see RegularExpression#matches(java.lang.String)
     */
    public static boolean matches(String regex, String target) throws ParseException {
        return REUtil.createRegex(regex, null).matches(target);
    }

    /**
     *
     * @see RegularExpression#matches(java.lang.String)
     */
    public static boolean matches(String regex, String options, String target) throws ParseException {
        return REUtil.createRegex(regex, options).matches(target);
    }

    // ================================================================

    /**
     *
     */
    public static String quoteMeta(String literal) {
        int len = literal.length();
        StringBuffer buffer = null;
        for (int i = 0;  i < len;  i ++) {
            int ch = literal.charAt(i);
            if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
                if (buffer == null) {
                    buffer = new StringBuffer(i+(len-i)*2);
                    if (i > 0)  buffer.append(literal.substring(0, i));
                }
                buffer.append((char)'\\');
                buffer.append((char)ch);
            } else if (buffer != null)
                buffer.append((char)ch);
        }
        return buffer != null ? buffer.toString() : literal;
    }

    // ================================================================

    static void dumpString(String v) {
        for (int i = 0;  i < v.length();  i ++) {
            System.out.print(Integer.toHexString(v.charAt(i)));
            System.out.print(" ");
        }
        System.out.println();
    }
}