FileDocCategorySizeDatePackage
REProgram.javaAPI DocJava SE 6 API5753Tue Jun 10 00:22:24 BST 2008com.sun.org.apache.regexp.internal

REProgram

public class REProgram extends Object implements Serializable
A class that holds compiled regular expressions. This is exposed mainly for use by the recompile utility (which helps you produce precompiled REProgram objects). You should not otherwise need to work directly with this class.
see
RE
see
RECompiler
author
Jonathan Locke
version
$Id: REProgram.java,v 1.1.2.1 2005/08/01 00:02:54 jeffsuttor Exp $

Fields Summary
static final int
OPT_HASBACKREFS
char[]
instruction
int
lenInstruction
char[]
prefix
int
flags
int
maxParens
Constructors Summary
public REProgram(char[] instruction)
Constructs a program object from a character array

param
instruction Character array with RE opcode instructions in it


                           
      
    
        this(instruction, instruction.length);
    
public REProgram(int parens, char[] instruction)
Constructs a program object from a character array

param
parens Count of parens in the program
param
instruction Character array with RE opcode instructions in it

        this(instruction, instruction.length);
        this.maxParens = parens;
    
public REProgram(char[] instruction, int lenInstruction)
Constructs a program object from a character array

param
instruction Character array with RE opcode instructions in it
param
lenInstruction Amount of instruction array in use

        setInstructions(instruction, lenInstruction);
    
Methods Summary
public char[]getInstructions()
Returns a copy of the current regular expression program in a character array that is exactly the right length to hold the program. If there is no program compiled yet, getInstructions() will return null.

return
A copy of the current compiled RE program

        // Ensure program has been compiled!
        if (lenInstruction != 0)
        {
            // Return copy of program 
            char[] ret = new char[lenInstruction];
            System.arraycopy(instruction, 0, ret, 0, lenInstruction);
            return ret;
        }
        return null;
    
public voidsetInstructions(char[] instruction, int lenInstruction)
Sets a new regular expression program to run. It is this method which performs any special compile-time search optimizations. Currently only two optimizations are in place - one which checks for backreferences (so that they can be lazily allocated) and another which attempts to find an prefix anchor string so that substantial amounts of input can potentially be skipped without running the actual program.

param
instruction Program instruction buffer
param
lenInstruction Length of instruction buffer in use

        // Save reference to instruction array
        this.instruction = instruction;
        this.lenInstruction = lenInstruction;

        // Initialize other program-related variables
        flags = 0;
        prefix = null;

        // Try various compile-time optimizations if there's a program
        if (instruction != null && lenInstruction != 0)
        {
            // If the first node is a branch
            if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
            {
                // to the end node
                int next = instruction[0 + RE.offsetNext];
                if (instruction[next + RE.offsetOpcode] == RE.OP_END)
                {
                    // and the branch starts with an atom
                    if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
                    {
                        // then get that atom as an prefix because there's no other choice
                        int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
                        prefix = new char[lenAtom];
                        System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
                    }
                }
            }

            BackrefScanLoop:

            // Check for backreferences
            for (int i = 0; i < lenInstruction; i += RE.nodeSize)
            {
                switch (instruction[i + RE.offsetOpcode])
                {
                    case RE.OP_ANYOF:
                        i += (instruction[i + RE.offsetOpdata] * 2);
                        break;

                    case RE.OP_ATOM:
                        i += instruction[i + RE.offsetOpdata];
                        break;

                    case RE.OP_BACKREF:
                        flags |= OPT_HASBACKREFS;
                        break BackrefScanLoop;
                }
            }
        }