JavaCodeGeneratorpublic class JavaCodeGenerator extends CodeGenerator Generate, and |
Fields Summary |
protected int | syntacticPredLevel | protected boolean | genAST | protected boolean | saveText | String | labeledElementType | String | labeledElementASTType | String | labeledElementInit | String | commonExtraArgs | String | commonExtraParams | String | commonLocalVars | String | lt1Value | String | exceptionThrown | String | throwNoViable | RuleBlock | currentRuleTracks the rule being generated. Used for mapTreeId | String | currentASTResultTracks the rule or labeled subrule being generated. Used for
AST generation. | Hashtable | treeVariableMapMapping between the ids used in the current alt, and the
names of variables used to represent their AST values. | Hashtable | declaredASTVariablesUsed to keep track of which AST variables have been defined in a rule
(except for the #rule_name and #rule_name_in var's | int | astVarNumber | protected static final String | NONUNIQUESpecial value used to mark duplicate in treeVariableMap | public static final int | caseSizeThreshold | private Vector | semPreds |
Constructors Summary |
public JavaCodeGenerator()Create a Java code-generator using the given Grammar.
The caller must still call setTool, setBehavior, and setAnalyzer
before generating code.
charFormatter = new JavaCharFormatter();
Methods Summary |
private void | GenRuleInvocation(persistence.antlr.RuleRefElement rr)
// dump rule name
_print(rr.targetRule + "(");
// lexers must tell rule if it should set _returnToken
if (grammar instanceof LexerGrammar) {
// if labeled, could access Token, so tell rule to create
if (rr.getLabel() != null) {
else {
if (commonExtraArgs.length() != 0 || rr.args != null) {
// Extra arguments common to all rules for this grammar
if (commonExtraArgs.length() != 0 && rr.args != null) {
// Process arguments to method, if any
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
if (rr.args != null) {
// When not guessing, execute user arg action
ActionTransInfo tInfo = new ActionTransInfo();
String args = processActionForSpecialSymbols(rr.args, 0, currentRule, tInfo);
if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
antlrTool.error("Arguments of rule reference '" + rr.targetRule + "' cannot set or ref #" +
currentRule.getRuleName(), grammar.getFilename(), rr.getLine(), rr.getColumn());
// Warn if the rule accepts no arguments
if (rs.block.argAction == null) {
antlrTool.warning("Rule '" + rr.targetRule + "' accepts no arguments", grammar.getFilename(), rr.getLine(), rr.getColumn());
else {
// For C++, no warning if rule has parameters, because there may be default
// values for all of the parameters
if (rs.block.argAction != null) {
antlrTool.warning("Missing parameters on reference to rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
// move down to the first child while parsing
if (grammar instanceof TreeWalkerGrammar) {
println("_t = _retTree;");
| protected int | addSemPred(java.lang.String predicate)Adds a semantic predicate string to the sem pred vector
These strings will be used to build an array of sem pred names
when building a debugging parser. This method should only be
called when the debug option is specified
return semPreds.size() - 1;
| public void | exitIfError()
if (antlrTool.hasError()) {
antlrTool.fatalError("Exiting due to errors.");
| public void | gen(persistence.antlr.LexerGrammar g)Generate the lexer Java file
// If debugging, create a new sempred vector for this grammar
if (g.debuggingOutput)
semPreds = new Vector();
if (!(grammar instanceof LexerGrammar)) {
antlrTool.panic("Internal error generating lexer");
// SAS: moved output creation to method so a subclass can change
// how the output is generated (for VAJ interface)
genAST = false; // no way to gen trees.
saveText = true; // save consumed characters.
tabs = 0;
// Generate header common to all Java output files
// Do not use printAction because we assume tabs==0
// Generate header specific to lexer Java file
// println("import;");
println("import persistence.antlr.TokenStreamException;");
println("import persistence.antlr.TokenStreamIOException;");
println("import persistence.antlr.TokenStreamRecognitionException;");
println("import persistence.antlr.CharStreamException;");
println("import persistence.antlr.CharStreamIOException;");
println("import persistence.antlr.ANTLRException;");
println("import java.util.Hashtable;");
println("import persistence.antlr." + grammar.getSuperClass() + ";");
println("import persistence.antlr.InputBuffer;");
println("import persistence.antlr.ByteBuffer;");
println("import persistence.antlr.CharBuffer;");
println("import persistence.antlr.Token;");
println("import persistence.antlr.CommonToken;");
println("import persistence.antlr.RecognitionException;");
println("import persistence.antlr.NoViableAltForCharException;");
println("import persistence.antlr.MismatchedCharException;");
println("import persistence.antlr.TokenStream;");
println("import persistence.antlr.ANTLRHashString;");
println("import persistence.antlr.LexerSharedInputState;");
println("import persistence.antlr.collections.impl.BitSet;");
println("import persistence.antlr.SemanticException;");
// Generate user-defined lexer file preamble
// Generate lexer class definition
String sup = null;
if (grammar.superClass != null) {
sup = grammar.superClass;
else {
sup = "persistence.antlr." + grammar.getSuperClass();
// print javadoc comment if any
if (grammar.comment != null) {
// get prefix (replaces "public" and lets user specify)
String prefix = "public";
Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
if (tprefix != null) {
String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
if (p != null) {
prefix = p;
print(prefix+" ");
print("class " + grammar.getClassName() + " extends " + sup);
println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix + ", TokenStream");
Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
if (tsuffix != null) {
String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
if (suffix != null) {
print(", " + suffix); // must be an interface name for Java
println(" {");
// Generate user-defined lexer class members
processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
// Generate the constructor from InputStream, which in turn
// calls the ByteBuffer constructor
println("public " + grammar.getClassName() + "(InputStream in) {");
println("this(new ByteBuffer(in));");
// Generate the constructor from Reader, which in turn
// calls the CharBuffer constructor
println("public " + grammar.getClassName() + "(Reader in) {");
println("this(new CharBuffer(in));");
println("public " + grammar.getClassName() + "(InputBuffer ib) {");
// if debugging, wrap the input buffer in a debugger
if (grammar.debuggingOutput)
println("this(new LexerSharedInputState(new persistence.antlr.debug.DebuggingInputBuffer(ib)));");
println("this(new LexerSharedInputState(ib));");
// Generate the constructor from InputBuffer (char or byte)
println("public " + grammar.getClassName() + "(LexerSharedInputState state) {");
// if debugging, set up array variables and call user-overridable
// debugging setup method
if (grammar.debuggingOutput) {
println(" ruleNames = _ruleNames;");
println(" semPredNames = _semPredNames;");
println(" setupDebugging();");
// Generate the setting of various generated options.
// These need to be before the literals since ANTLRHashString depends on
// the casesensitive stuff.
println("caseSensitiveLiterals = " + g.caseSensitiveLiterals + ";");
println("setCaseSensitive(" + g.caseSensitive + ");");
// Generate the initialization of a hashtable
// containing the string literals used in the lexer
// The literals variable itself is in CharScanner
println("literals = new Hashtable();");
Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
while (keys.hasMoreElements()) {
String key = (String)keys.nextElement();
if (key.charAt(0) != '"") {
TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
if (sym instanceof StringLiteralSymbol) {
StringLiteralSymbol s = (StringLiteralSymbol)sym;
println("literals.put(new ANTLRHashString(" + s.getId() + ", this), new Integer(" + s.getTokenType() + "));");
Enumeration ids;
// generate the rule name array for debugging
if (grammar.debuggingOutput) {
println("private static final String _ruleNames[] = {");
ids = grammar.rules.elements();
int ruleNum = 0;
while (ids.hasMoreElements()) {
GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
if (sym instanceof RuleSymbol)
println(" \"" + ((RuleSymbol)sym).getId() + "\",");
// Generate nextToken() rule.
// nextToken() is a synthetic lexer rule that is the implicit OR of all
// user-defined lexer rules.
// Generate code for each rule in the lexer
ids = grammar.rules.elements();
int ruleNum = 0;
while (ids.hasMoreElements()) {
RuleSymbol sym = (RuleSymbol)ids.nextElement();
// Don't generate the synthetic rules
if (!sym.getId().equals("mnextToken")) {
genRule(sym, false, ruleNum++);
// Generate the semantic predicate map for debugging
if (grammar.debuggingOutput)
// Generate the bitsets used throughout the lexer
genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());
// Close the lexer output stream
currentOutput = null;
| public void | gen(persistence.antlr.OneOrMoreBlock blk)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("gen+(" + blk + ")");
String label;
String cnt;
if (blk.getLabel() != null) {
cnt = "_cnt_" + blk.getLabel();
else {
cnt = "_cnt" + blk.ID;
println("int " + cnt + "=0;");
if (blk.getLabel() != null) {
label = blk.getLabel();
else {
label = "_loop" + blk.ID;
println(label + ":");
println("do {");
// generate the init action for ()+ ()* inside the loop
// this allows us to do usefull EOF checking...
// Tell AST generation to build subrule result
String saveCurrentASTResult = currentASTResult;
if (blk.getLabel() != null) {
currentASTResult = blk.getLabel();
boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
// generate exit test if greedy set to false
// and an alt is ambiguous with exit branch
// or when lookahead derived purely from end-of-file
// Lookahead analysis stops when end-of-file is hit,
// returning set {epsilon}. Since {epsilon} is not
// ambig with any real tokens, no error is reported
// by deterministic() routines and we have to check
// for the case where the lookahead depth didn't get
// set to NONDETERMINISTIC (this only happens when the
// FOLLOW contains real atoms + epsilon).
boolean generateNonGreedyExitPath = false;
int nonGreedyExitDepth = grammar.maxk;
if (!blk.greedy &&
blk.exitLookaheadDepth <= grammar.maxk &&
blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
generateNonGreedyExitPath = true;
nonGreedyExitDepth = blk.exitLookaheadDepth;
else if (!blk.greedy &&
blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
generateNonGreedyExitPath = true;
// generate exit test if greedy set to false
// and an alt is ambiguous with exit branch
if (generateNonGreedyExitPath) {
System.out.println("nongreedy (...)+ loop; exit depth is " +
String predictExit =
println("// nongreedy exit test");
println("if ( " + cnt + ">=1 && " + predictExit + ") break " + label + ";");
JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
"if ( " + cnt + ">=1 ) { break " + label + "; } else {" + throwNoViable + "}"
println(cnt + "++;");
println("} while (true);");
// Restore previous AST generation
currentASTResult = saveCurrentASTResult;
| public void | gen(persistence.antlr.ParserGrammar g)Generate the parser Java file
// if debugging, set up a new vector to keep track of sempred
// strings for this grammar
if (g.debuggingOutput)
semPreds = new Vector();
if (!(grammar instanceof ParserGrammar)) {
antlrTool.panic("Internal error generating parser");
// Open the output stream for the parser and set the currentOutput
// SAS: moved file setup so subclass could do it (for VAJ interface)
genAST = grammar.buildAST;
tabs = 0;
// Generate the header common to all output files.
// Do not use printAction because we assume tabs==0
// Generate header for the parser
println("import persistence.antlr.TokenBuffer;");
println("import persistence.antlr.TokenStreamException;");
println("import persistence.antlr.TokenStreamIOException;");
println("import persistence.antlr.ANTLRException;");
println("import persistence.antlr." + grammar.getSuperClass() + ";");
println("import persistence.antlr.Token;");
println("import persistence.antlr.TokenStream;");
println("import persistence.antlr.RecognitionException;");
println("import persistence.antlr.NoViableAltException;");
println("import persistence.antlr.MismatchedTokenException;");
println("import persistence.antlr.SemanticException;");
println("import persistence.antlr.ParserSharedInputState;");
println("import persistence.antlr.collections.impl.BitSet;");
if ( genAST ) {
println("import persistence.antlr.collections.AST;");
println("import java.util.Hashtable;");
println("import persistence.antlr.ASTFactory;");
println("import persistence.antlr.ASTPair;");
println("import persistence.antlr.collections.impl.ASTArray;");
// Output the user-defined parser preamble
// Generate parser class definition
String sup = null;
if (grammar.superClass != null)
sup = grammar.superClass;
sup = "persistence.antlr." + grammar.getSuperClass();
// print javadoc comment if any
if (grammar.comment != null) {
// get prefix (replaces "public" and lets user specify)
String prefix = "public";
Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
if (tprefix != null) {
String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
if (p != null) {
prefix = p;
print(prefix+" ");
print("class " + grammar.getClassName() + " extends " + sup);
println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
if (tsuffix != null) {
String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
if (suffix != null)
print(", " + suffix); // must be an interface name for Java
println(" {");
// set up an array of all the rule names so the debugger can
// keep track of them only by number -- less to store in tree...
if (grammar.debuggingOutput) {
println("private static final String _ruleNames[] = {");
Enumeration ids = grammar.rules.elements();
int ruleNum = 0;
while (ids.hasMoreElements()) {
GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
if (sym instanceof RuleSymbol)
println(" \"" + ((RuleSymbol)sym).getId() + "\",");
// Generate user-defined parser class members
processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
// Generate parser class constructor from TokenBuffer
println("protected " + grammar.getClassName() + "(TokenBuffer tokenBuf, int k) {");
println(" super(tokenBuf,k);");
println(" tokenNames = _tokenNames;");
// if debugging, set up arrays and call the user-overridable
// debugging setup method
if (grammar.debuggingOutput) {
println(" ruleNames = _ruleNames;");
println(" semPredNames = _semPredNames;");
println(" setupDebugging(tokenBuf);");
if ( grammar.buildAST ) {
println(" buildTokenTypeASTClassMap();");
println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
println("public " + grammar.getClassName() + "(TokenBuffer tokenBuf) {");
println(" this(tokenBuf," + grammar.maxk + ");");
// Generate parser class constructor from TokenStream
println("protected " + grammar.getClassName() + "(TokenStream lexer, int k) {");
println(" super(lexer,k);");
println(" tokenNames = _tokenNames;");
// if debugging, set up arrays and call the user-overridable
// debugging setup method
if (grammar.debuggingOutput) {
println(" ruleNames = _ruleNames;");
println(" semPredNames = _semPredNames;");
println(" setupDebugging(lexer);");
if ( grammar.buildAST ) {
println(" buildTokenTypeASTClassMap();");
println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
println("public " + grammar.getClassName() + "(TokenStream lexer) {");
println(" this(lexer," + grammar.maxk + ");");
println("public " + grammar.getClassName() + "(ParserSharedInputState state) {");
println(" super(state," + grammar.maxk + ");");
println(" tokenNames = _tokenNames;");
if ( grammar.buildAST ) {
println(" buildTokenTypeASTClassMap();");
println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
// Generate code for each rule in the grammar
Enumeration ids = grammar.rules.elements();
int ruleNum = 0;
while (ids.hasMoreElements()) {
GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
if (sym instanceof RuleSymbol) {
RuleSymbol rs = (RuleSymbol)sym;
genRule(rs, rs.references.size() == 0, ruleNum++);
// Generate the token names
if ( grammar.buildAST ) {
// Generate the bitsets used throughout the grammar
genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
// Generate the semantic predicate map for debugging
if (grammar.debuggingOutput)
// Close class definition
// Close the parser output stream
currentOutput = null;
| public void | gen(persistence.antlr.RuleRefElement rr)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genRR(" + rr + ")");
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
if (rs == null || !rs.isDefined()) {
// Is this redundant???
antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn());
if (!(rs instanceof RuleSymbol)) {
// Is this redundant???
antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn());
// AST value for labeled rule refs in tree walker.
// This is not AST construction; it is just the input tree node value.
if (grammar instanceof TreeWalkerGrammar &&
rr.getLabel() != null &&
syntacticPredLevel == 0) {
println(rr.getLabel() + " = _t==ASTNULL ? null : " + lt1Value + ";");
// if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
// Process return value assignment if any
if (rr.idAssign != null) {
// Warn if the rule has no return type
if (rs.block.returnAction == null) {
antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn());
_print(rr.idAssign + "=");
else {
// Warn about return value if any, but not inside syntactic predicate
if (!(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) {
antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn());
// Call the rule
// if in lexer and ! on element or alt or rule, save buffer index to kill later
if (grammar instanceof LexerGrammar && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
// if not in a syntactic predicate
if (syntacticPredLevel == 0) {
boolean doNoGuessTest = (
grammar.hasSyntacticPredicate &&
grammar.buildAST && rr.getLabel() != null ||
(genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE)
if (doNoGuessTest) {
// println("if (inputState.guessing==0) {");
// tabs++;
if (grammar.buildAST && rr.getLabel() != null) {
// always gen variable for rule return on labeled rules
println(rr.getLabel() + "_AST = (" + labeledElementASTType + ")returnAST;");
if (genAST) {
switch (rr.getAutoGenType()) {
case GrammarElement.AUTO_GEN_NONE:
// println("theASTFactory.addASTChild(currentAST, returnAST);");
println("astFactory.addASTChild(currentAST, returnAST);");
case GrammarElement.AUTO_GEN_CARET:
antlrTool.error("Internal: encountered ^ after rule reference");
// if a lexer and labeled, Token label defined at rule level, just set it here
if (grammar instanceof LexerGrammar && rr.getLabel() != null) {
println(rr.getLabel() + "=_returnToken;");
if (doNoGuessTest) {
// tabs--;
// println("}");
| public void | gen(persistence.antlr.StringLiteralElement atom)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genString(" + atom + ")");
// Variable declarations for labeled elements
if (atom.getLabel() != null && syntacticPredLevel == 0) {
println(atom.getLabel() + " = " + lt1Value + ";");
// AST
// is there a bang on the literal?
boolean oldsaveText = saveText;
saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
// matching
saveText = oldsaveText;
// tack on tree cursor motion if doing a tree walker
if (grammar instanceof TreeWalkerGrammar) {
println("_t = _t.getNextSibling();");
| public void | gen(persistence.antlr.TokenRangeElement r)Generate code for the given grammar element.
if (r.getLabel() != null && syntacticPredLevel == 0) {
println(r.getLabel() + " = " + lt1Value + ";");
// AST
// match
println("matchRange(" + r.beginText + "," + r.endText + ");");
| public void | gen(persistence.antlr.TokenRefElement atom)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genTokenRef(" + atom + ")");
if (grammar instanceof LexerGrammar) {
antlrTool.panic("Token reference found in lexer");
// Assign Token value to token label variable
if (atom.getLabel() != null && syntacticPredLevel == 0) {
println(atom.getLabel() + " = " + lt1Value + ";");
// AST
// matching
// tack on tree cursor motion if doing a tree walker
if (grammar instanceof TreeWalkerGrammar) {
println("_t = _t.getNextSibling();");
| public void | gen(persistence.antlr.TreeElement t)
// save AST cursor
println("AST __t" + t.ID + " = _t;");
// If there is a label on the root, then assign that to the variable
if (t.root.getLabel() != null) {
println(t.root.getLabel() + " = _t==ASTNULL ? null :(" + labeledElementASTType + ")_t;");
// check for invalid modifiers ! and ^ on tree element roots
if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) {
antlrTool.error("Suffixing a root node with '!' is not implemented",
grammar.getFilename(), t.getLine(), t.getColumn());
if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) {
antlrTool.warning("Suffixing a root node with '^' is redundant; already a root",
grammar.getFilename(), t.getLine(), t.getColumn());
// Generate AST variables
if (grammar.buildAST) {
// Save the AST construction state
println("ASTPair __currentAST" + t.ID + " = currentAST.copy();");
// Make the next item added a child of the TreeElement root
println("currentAST.root = currentAST.child;");
println("currentAST.child = null;");
// match root
if ( t.root instanceof WildcardElement ) {
println("if ( _t==null ) throw new MismatchedTokenException();");
else {
// move to list of children
println("_t = _t.getFirstChild();");
// walk list of children, generating code for each
for (int i = 0; i < t.getAlternatives().size(); i++) {
Alternative a = t.getAlternativeAt(i);
AlternativeElement e = a.head;
while (e != null) {
e =;
if (grammar.buildAST) {
// restore the AST construction state to that just after the
// tree root was added
println("currentAST = __currentAST" + t.ID + ";");
// restore AST cursor
println("_t = __t" + t.ID + ";");
// move cursor to sibling of tree just parsed
println("_t = _t.getNextSibling();");
| public void | gen(persistence.antlr.TreeWalkerGrammar g)Generate the tree-parser Java file
// SAS: debugging stuff removed for now...
if (!(grammar instanceof TreeWalkerGrammar)) {
antlrTool.panic("Internal error generating tree-walker");
// Open the output stream for the parser and set the currentOutput
// SAS: move file open to method so subclass can override it
// (mainly for VAJ interface)
genAST = grammar.buildAST;
tabs = 0;
// Generate the header common to all output files.
// Do not use printAction because we assume tabs==0
// Generate header for the parser
println("import persistence.antlr." + grammar.getSuperClass() + ";");
println("import persistence.antlr.Token;");
println("import persistence.antlr.collections.AST;");
println("import persistence.antlr.RecognitionException;");
println("import persistence.antlr.ANTLRException;");
println("import persistence.antlr.NoViableAltException;");
println("import persistence.antlr.MismatchedTokenException;");
println("import persistence.antlr.SemanticException;");
println("import persistence.antlr.collections.impl.BitSet;");
println("import persistence.antlr.ASTPair;");
println("import persistence.antlr.collections.impl.ASTArray;");
// Output the user-defined parser premamble
// Generate parser class definition
String sup = null;
if (grammar.superClass != null) {
sup = grammar.superClass;
else {
sup = "persistence.antlr." + grammar.getSuperClass();
// print javadoc comment if any
if (grammar.comment != null) {
// get prefix (replaces "public" and lets user specify)
String prefix = "public";
Token tprefix = (Token)grammar.options.get("classHeaderPrefix");
if (tprefix != null) {
String p = StringUtils.stripFrontBack(tprefix.getText(), "\"", "\"");
if (p != null) {
prefix = p;
print(prefix+" ");
print("class " + grammar.getClassName() + " extends " + sup);
println(" implements " + grammar.tokenManager.getName() + TokenTypesFileSuffix);
Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
if (tsuffix != null) {
String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
if (suffix != null) {
print(", " + suffix); // must be an interface name for Java
println(" {");
// Generate user-defined parser class members
processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null)
// Generate default parser class constructor
println("public " + grammar.getClassName() + "() {");
println("tokenNames = _tokenNames;");
// Generate code for each rule in the grammar
Enumeration ids = grammar.rules.elements();
int ruleNum = 0;
String ruleNameInits = "";
while (ids.hasMoreElements()) {
GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
if (sym instanceof RuleSymbol) {
RuleSymbol rs = (RuleSymbol)sym;
genRule(rs, rs.references.size() == 0, ruleNum++);
// Generate the token names
// Generate the bitsets used throughout the grammar
genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
// Close class definition
// Close the parser output stream
currentOutput = null;
| public void | gen(persistence.antlr.WildcardElement wc)Generate code for the given grammar element.
// Variable assignment for labeled elements
if (wc.getLabel() != null && syntacticPredLevel == 0) {
println(wc.getLabel() + " = " + lt1Value + ";");
// AST
// Match anything but EOF
if (grammar instanceof TreeWalkerGrammar) {
println("if ( _t==null ) throw new MismatchedTokenException();");
else if (grammar instanceof LexerGrammar) {
if (grammar instanceof LexerGrammar &&
(!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
if (grammar instanceof LexerGrammar &&
(!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
println("text.setLength(_saveIndex);"); // kill text atom put in buffer
else {
println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
// tack on tree cursor motion if doing a tree walker
if (grammar instanceof TreeWalkerGrammar) {
println("_t = _t.getNextSibling();");
| public void | gen(persistence.antlr.ZeroOrMoreBlock blk)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("gen*(" + blk + ")");
String label;
if (blk.getLabel() != null) {
label = blk.getLabel();
else {
label = "_loop" + blk.ID;
println(label + ":");
println("do {");
// generate the init action for ()* inside the loop
// this allows us to do usefull EOF checking...
// Tell AST generation to build subrule result
String saveCurrentASTResult = currentASTResult;
if (blk.getLabel() != null) {
currentASTResult = blk.getLabel();
boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
// generate exit test if greedy set to false
// and an alt is ambiguous with exit branch
// or when lookahead derived purely from end-of-file
// Lookahead analysis stops when end-of-file is hit,
// returning set {epsilon}. Since {epsilon} is not
// ambig with any real tokens, no error is reported
// by deterministic() routines and we have to check
// for the case where the lookahead depth didn't get
// set to NONDETERMINISTIC (this only happens when the
// FOLLOW contains real atoms + epsilon).
boolean generateNonGreedyExitPath = false;
int nonGreedyExitDepth = grammar.maxk;
if (!blk.greedy &&
blk.exitLookaheadDepth <= grammar.maxk &&
blk.exitCache[blk.exitLookaheadDepth].containsEpsilon()) {
generateNonGreedyExitPath = true;
nonGreedyExitDepth = blk.exitLookaheadDepth;
else if (!blk.greedy &&
blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
generateNonGreedyExitPath = true;
if (generateNonGreedyExitPath) {
System.out.println("nongreedy (...)* loop; exit depth is " +
String predictExit =
println("// nongreedy exit test");
println("if (" + predictExit + ") break " + label + ";");
JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
genBlockFinish(howToFinish, "break " + label + ";");
println("} while (true);");
// Restore previous AST generation
currentASTResult = saveCurrentASTResult;
| public void | gen()Generate the parser, lexer, treeparser, and token types in Java
// Do the code generation
try {
// Loop over all grammars
Enumeration grammarIter = behavior.grammars.elements();
while (grammarIter.hasMoreElements()) {
Grammar g = (Grammar)grammarIter.nextElement();
// Connect all the components to each other
// To get right overloading behavior across hetrogeneous grammars
// print out the grammar with lookahead sets (and FOLLOWs)
// System.out.print(g.toString());
// Loop over all token managers (some of which are lexers)
Enumeration tmIter = behavior.tokenManagers.elements();
while (tmIter.hasMoreElements()) {
TokenManager tm = (TokenManager)tmIter.nextElement();
if (!tm.isReadOnly()) {
// Write the token manager tokens as Java
// this must appear before genTokenInterchange so that
// labels are set on string literals
// Write the token manager tokens as plain text
catch (IOException e) {
antlrTool.reportException(e, null);
| public void | gen(persistence.antlr.ActionElement action)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genAction(" + action + ")");
if (action.isSemPred) {
genSemPred(action.actionText, action.line);
else {
if (grammar.hasSyntacticPredicate) {
println("if ( inputState.guessing==0 ) {");
// get the name of the followSet for the current rule so that we
// can replace $FOLLOW in the .g file.
ActionTransInfo tInfo = new ActionTransInfo();
String actionStr = processActionForSpecialSymbols(action.actionText,
if (tInfo.refRuleRoot != null) {
// Somebody referenced "#rule", make sure translated var is valid
// assignment to #rule is left as a ref also, meaning that assignments
// with no other refs like "#rule = foo();" still forces this code to be
// generated (unnecessarily).
println(tInfo.refRuleRoot + " = (" + labeledElementASTType + ")currentAST.root;");
// dump the translated action
if (tInfo.assignToRoot) {
// Somebody did a "#rule=", reset internal currentAST.root
println("currentAST.root = " + tInfo.refRuleRoot + ";");
// reset the child pointer too to be last sibling in sibling list
println("currentAST.child = " + tInfo.refRuleRoot + "!=null &&" + tInfo.refRuleRoot + ".getFirstChild()!=null ?");
println(tInfo.refRuleRoot + ".getFirstChild() : " + tInfo.refRuleRoot + ";");
if (grammar.hasSyntacticPredicate) {
| public void | gen(persistence.antlr.AlternativeBlock blk)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("gen(" + blk + ")");
// Tell AST generation to build subrule result
String saveCurrentASTResult = currentASTResult;
if (blk.getLabel() != null) {
currentASTResult = blk.getLabel();
boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
genBlockFinish(howToFinish, throwNoViable);
// Restore previous AST generation
currentASTResult = saveCurrentASTResult;
| public void | gen(persistence.antlr.BlockEndElement end)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genRuleEnd(" + end + ")");
| public void | gen(persistence.antlr.CharLiteralElement atom)Generate code for the given grammar element.
if (DEBUG_CODE_GENERATOR) System.out.println("genChar(" + atom + ")");
if (atom.getLabel() != null) {
println(atom.getLabel() + " = " + lt1Value + ";");
boolean oldsaveText = saveText;
saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
saveText = oldsaveText;
| public void | gen(persistence.antlr.CharRangeElement r)Generate code for the given grammar element.
if (r.getLabel() != null && syntacticPredLevel == 0) {
println(r.getLabel() + " = " + lt1Value + ";");
boolean flag = ( grammar instanceof LexerGrammar &&
( !saveText ||
r.getAutoGenType() ==
GrammarElement.AUTO_GEN_BANG ) );
if (flag) {
println("matchRange(" + r.beginText + "," + r.endText + ");");
if (flag) {
| protected void | genASTDeclaration(persistence.antlr.AlternativeElement el)
genASTDeclaration(el, labeledElementASTType);
| protected void | genASTDeclaration(persistence.antlr.AlternativeElement el, java.lang.String node_type)
genASTDeclaration(el, el.getLabel(), node_type);
| protected void | genASTDeclaration(persistence.antlr.AlternativeElement el, java.lang.String var_name, java.lang.String node_type)
// already declared?
if (declaredASTVariables.contains(el))
// emit code
println(node_type + " " + var_name + "_AST = null;");
// mark as declared
| protected void | genAlt(persistence.antlr.Alternative alt, persistence.antlr.AlternativeBlock blk)Generate an alternative.
// Save the AST generation state, and set it to that of the alt
boolean savegenAST = genAST;
genAST = genAST && alt.getAutoGen();
boolean oldsaveTest = saveText;
saveText = saveText && alt.getAutoGen();
// Reset the variable name map for the alternative
Hashtable saveMap = treeVariableMap;
treeVariableMap = new Hashtable();
// Generate try block around the alt for error handling
if (alt.exceptionSpec != null) {
println("try { // for error handling");
AlternativeElement elem = alt.head;
while (!(elem instanceof BlockEndElement)) {
elem.generate(); // alt can begin with anything. Ask target to gen.
elem =;
if (genAST) {
if (blk instanceof RuleBlock) {
// Set the AST return value for the rule
RuleBlock rblk = (RuleBlock)blk;
if (grammar.hasSyntacticPredicate) {
// println("if ( inputState.guessing==0 ) {");
// tabs++;
println(rblk.getRuleName() + "_AST = (" + labeledElementASTType + ")currentAST.root;");
if (grammar.hasSyntacticPredicate) {
// --tabs;
// println("}");
else if (blk.getLabel() != null) {
// ### future: also set AST value for labeled subrules.
// println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
antlrTool.warning("Labeled subrules not yet supported", grammar.getFilename(), blk.getLine(), blk.getColumn());
if (alt.exceptionSpec != null) {
// close try block
genAST = savegenAST;
saveText = oldsaveTest;
treeVariableMap = saveMap;
| private void | genBitSet(persistence.antlr.collections.impl.BitSet p, int id)Do something simple like:
private static final long[] mk_tokenSet_0() {
long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
return data;
public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
Or, for large bitsets, optimize init so ranges are collapsed into loops.
This is most useful for lexers using unicode.
// initialization data
"private static final long[] mk" + getBitsetName(id) + "() {"
int n = p.lengthInLongWords();
println("\tlong[] data = { " + p.toStringOfWords() + "};");
else {
// will init manually, allocate space then set values
println("\tlong[] data = new long["+n+"];");
long[] elems = p.toPackedArray();
for (int i = 0; i < elems.length;) {
if ( elems[i]==0 ) {
// done automatically by Java, don't waste time/code
if ( (i+1)==elems.length || elems[i]!=elems[i+1] ) {
// last number or no run of numbers, just dump assignment
else {
// scan to find end of run
int j;
for (j = i + 1;
j < elems.length && elems[j]==elems[i];
// j-1 is last member of run
println("\tfor (int i = "+i+"; i<="+(j-1)+"; i++) { data[i]="+
elems[i]+"L; }");
i = j;
println("\treturn data;");
// BitSet object
"public static final BitSet " + getBitsetName(id) + " = new BitSet(" +
"mk" + getBitsetName(id) + "()" +
| protected void | genBitsets(persistence.antlr.collections.impl.Vector bitsetList, int maxVocabulary)Generate all the bitsets to be used in the parser or lexer
Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
Note that most languages do not support object initialization inside a
class definition, so other code-generators may have to separate the
bitset declarations from the initializations (e.g., put the initializations
in the generated constructor instead).
for (int i = 0; i < bitsetList.size(); i++) {
BitSet p = (BitSet)bitsetList.elementAt(i);
// Ensure that generated BitSet is large enough for vocabulary
genBitSet(p, i);
| private void | genBlockFinish(persistence.antlr.JavaBlockFinishingInfo howToFinish, java.lang.String noViableAction)Generate the finish of a block, using a combination of the info
returned from genCommonBlock() and the action to perform when
no alts were taken
if (howToFinish.needAnErrorClause &&
(howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
if (howToFinish.generatedAnIf) {
println("else {");
else {
if (howToFinish.postscript != null) {
| protected void | genBlockInitAction(persistence.antlr.AlternativeBlock blk)Generate the init action for a block, which may be a RuleBlock or a
plain AlternativeBLock.
// dump out init action
if (blk.initAction != null) {
printAction(processActionForSpecialSymbols(blk.initAction, blk.getLine(), currentRule, null));
| protected void | genBlockPreamble(persistence.antlr.AlternativeBlock blk)Generate the header for a block, which may be a RuleBlock or a
plain AlternativeBLock. This generates any variable declarations
and syntactic-predicate-testing variables.
// define labels for rule blocks.
if (blk instanceof RuleBlock) {
RuleBlock rblk = (RuleBlock)blk;
if (rblk.labeledElements != null) {
for (int i = 0; i < rblk.labeledElements.size(); i++) {
AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i);
// System.out.println("looking at labeled element: "+a);
// Variables for labeled rule refs and
// subrules are different than variables for
// grammar atoms. This test is a little tricky
// because we want to get all rule refs and ebnf,
// but not rule blocks or syntactic predicates
if (
a instanceof RuleRefElement ||
a instanceof AlternativeBlock &&
!(a instanceof RuleBlock) &&
!(a instanceof SynPredBlock)
) {
if (
!(a instanceof RuleRefElement) &&
((AlternativeBlock)a).not &&
analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar)
) {
// Special case for inverted subrules that
// will be inlined. Treat these like
// token or char literal references
println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
if (grammar.buildAST) {
else {
if (grammar.buildAST) {
// Always gen AST variables for
// labeled elements, even if the
// element itself is marked with !
if (grammar instanceof LexerGrammar) {
println("Token " + a.getLabel() + "=null;");
if (grammar instanceof TreeWalkerGrammar) {
// always generate rule-ref variables
// for tree walker
println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
else {
// It is a token or literal reference. Generate the
// correct variable type for this grammar
println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";");
// In addition, generate *_AST variables if
// building ASTs
if (grammar.buildAST) {
if (a instanceof GrammarAtom &&
((GrammarAtom)a).getASTNodeType() != null) {
GrammarAtom ga = (GrammarAtom)a;
genASTDeclaration(a, ga.getASTNodeType());
else {
| protected void | genCases(persistence.antlr.collections.impl.BitSet p)Generate a series of case statements that implement a BitSet test.
if (DEBUG_CODE_GENERATOR) System.out.println("genCases(" + p + ")");
int[] elems;
elems = p.toArray();
// Wrap cases four-per-line for lexer, one-per-line for parser
int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
int j = 1;
boolean startOfLine = true;
for (int i = 0; i < elems.length; i++) {
if (j == 1) {
else {
_print(" ");
_print("case " + getValueString(elems[i]) + ":");
if (j == wrap) {
startOfLine = true;
j = 1;
else {
startOfLine = false;
if (!startOfLine) {
| public persistence.antlr.JavaBlockFinishingInfo | genCommonBlock(persistence.antlr.AlternativeBlock blk, boolean noTestForSingle)Generate common code for a block of alternatives; return a
postscript that needs to be generated at the end of the
block. Other routines may append else-clauses and such for
error checking before the postfix is generated. If the
grammar is a lexer, then generate alternatives in an order
where alternatives requiring deeper lookahead are generated
first, and EOF in the lookahead set reduces the depth of
the lookahead. @param blk The block to generate @param
noTestForSingle If true, then it does not generate a test
for a single alternative.
int nIF = 0;
boolean createdLL1Switch = false;
int closingBracesOfIFSequence = 0;
JavaBlockFinishingInfo finishingInfo = new JavaBlockFinishingInfo();
if (DEBUG_CODE_GENERATOR) System.out.println("genCommonBlock(" + blk + ")");
// Save the AST generation state, and set it to that of the block
boolean savegenAST = genAST;
genAST = genAST && blk.getAutoGen();
boolean oldsaveTest = saveText;
saveText = saveText && blk.getAutoGen();
// Is this block inverted? If so, generate special-case code
if (
blk.not &&
analyzer.subruleCanBeInverted(blk, grammar instanceof LexerGrammar)
) {
if (DEBUG_CODE_GENERATOR) System.out.println("special case: ~(subrule)");
Lookahead p = analyzer.look(1, blk);
// Variable assignment for labeled elements
if (blk.getLabel() != null && syntacticPredLevel == 0) {
println(blk.getLabel() + " = " + lt1Value + ";");
// AST
String astArgs = "";
if (grammar instanceof TreeWalkerGrammar) {
astArgs = "_t,";
// match the bitset for the alternative
println("match(" + astArgs + getBitsetName(markBitsetForGen(p.fset)) + ");");
// tack on tree cursor motion if doing a tree walker
if (grammar instanceof TreeWalkerGrammar) {
println("_t = _t.getNextSibling();");
return finishingInfo;
// Special handling for single alt
if (blk.getAlternatives().size() == 1) {
Alternative alt = blk.getAlternativeAt(0);
// Generate a warning if there is a synPred for single alt.
if (alt.synPred != null) {
"Syntactic predicate superfluous for single alternative",
if (noTestForSingle) {
if (alt.semPred != null) {
// Generate validating predicate
genSemPred(alt.semPred, blk.line);
genAlt(alt, blk);
return finishingInfo;
// count number of simple LL(1) cases; only do switch for
// many LL(1) cases (no preds, no end of token refs)
// We don't care about exit paths for (...)*, (...)+
// because we don't explicitly have a test for them
// as an alt in the loop.
// Also, we now count how many unicode lookahead sets
// there are--they must be moved to DEFAULT or ELSE
// clause.
int nLL1 = 0;
for (int i = 0; i < blk.getAlternatives().size(); i++) {
Alternative a = blk.getAlternativeAt(i);
if (suitableForCaseExpression(a)) {
// do LL(1) cases
if (nLL1 >= makeSwitchThreshold) {
// Determine the name of the item to be compared
String testExpr = lookaheadString(1);
createdLL1Switch = true;
// when parsing trees, convert null to valid tree node with NULL lookahead
if (grammar instanceof TreeWalkerGrammar) {
println("if (_t==null) _t=ASTNULL;");
println("switch ( " + testExpr + ") {");
for (int i = 0; i < blk.alternatives.size(); i++) {
Alternative alt = blk.getAlternativeAt(i);
// ignore any non-LL(1) alts, predicated alts,
// or end-of-token alts for case expressions
if (!suitableForCaseExpression(alt)) {
Lookahead p = alt.cache[1];
if ( == 0 && !p.containsEpsilon()) {
antlrTool.warning("Alternate omitted due to empty prediction set",
alt.head.getLine(), alt.head.getColumn());
else {
genAlt(alt, blk);
// do non-LL(1) and nondeterministic cases This is tricky in
// the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
// : "*="; Since nextToken is generated without a loop, then
// the STAR will have end-of-token as it's lookahead set for
// LA(2). So, we must generate the alternatives containing
// trailing end-of-token in their lookahead sets *after* the
// alternatives without end-of-token. This implements the
// usual lexer convention that longer matches come before
// shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
// For non-lexer grammars, this does not sort the alternates
// by depth Note that alts whose lookahead is purely
// end-of-token at k=1 end up as default or else clauses.
int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk : 0;
for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
if (DEBUG_CODE_GENERATOR) System.out.println("checking depth " + altDepth);
for (int i = 0; i < blk.alternatives.size(); i++) {
Alternative alt = blk.getAlternativeAt(i);
if (DEBUG_CODE_GENERATOR) System.out.println("genAlt: " + i);
// if we made a switch above, ignore what we already took care
// of. Specifically, LL(1) alts with no preds
// that do not have end-of-token in their prediction set
// and that are not giant unicode sets.
if (createdLL1Switch && suitableForCaseExpression(alt)) {
if (DEBUG_CODE_GENERATOR) System.out.println("ignoring alt because it was in the switch");
String e;
boolean unpredicted = false;
if (grammar instanceof LexerGrammar) {
// Calculate the "effective depth" of the alt,
// which is the max depth at which
// cache[depth]!=end-of-token
int effectiveDepth = alt.lookaheadDepth;
if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
// use maximum lookahead
effectiveDepth = grammar.maxk;
while (effectiveDepth >= 1 &&
alt.cache[effectiveDepth].containsEpsilon()) {
// Ignore alts whose effective depth is other than
// the ones we are generating for this iteration.
if (effectiveDepth != altDepth) {
System.out.println("ignoring alt because effectiveDepth!=altDepth;" + effectiveDepth + "!=" + altDepth);
unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
e = getLookaheadTestExpression(alt, effectiveDepth);
else {
unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
e = getLookaheadTestExpression(alt, grammar.maxk);
// Was it a big unicode range that forced unsuitability
// for a case expression?
if (alt.cache[1] > caseSizeThreshold &&
suitableForCaseExpression(alt)) {
if (nIF == 0) {
println("if " + e + " {");
else {
println("else if " + e + " {");
else if (unpredicted &&
alt.semPred == null &&
alt.synPred == null) {
// The alt has empty prediction set and no
// predicate to help out. if we have not
// generated a previous if, just put {...} around
// the end-of-token clause
if (nIF == 0) {
else {
println("else {");
finishingInfo.needAnErrorClause = false;
else { // check for sem and syn preds
// Add any semantic predicate expression to the
// lookahead test
if (alt.semPred != null) {
// if debugging, wrap the evaluation of the
// predicate in a method translate $ and #
// references
ActionTransInfo tInfo = new ActionTransInfo();
String actionStr =
// ignore translation info...we don't need to
// do anything with it. call that will inform
// SemanticPredicateListeners of the result
if (((grammar instanceof ParserGrammar) ||
(grammar instanceof LexerGrammar)) &&
grammar.debuggingOutput) {
e = "(" + e + "&& fireSemanticPredicateEvaluated(persistence.antlr.debug.SemanticPredicateEvent.PREDICTING," +
addSemPred(charFormatter.escapeString(actionStr)) + "," + actionStr + "))";
else {
e = "(" + e + "&&(" + actionStr + "))";
// Generate any syntactic predicates
if (nIF > 0) {
if (alt.synPred != null) {
println("else {");
genSynPred(alt.synPred, e);
else {
println("else if " + e + " {");
else {
if (alt.synPred != null) {
genSynPred(alt.synPred, e);
else {
// when parsing trees, convert null to
// valid tree node with NULL lookahead.
if (grammar instanceof TreeWalkerGrammar) {
println("if (_t==null) _t=ASTNULL;");
println("if " + e + " {");
genAlt(alt, blk);
String ps = "";
for (int i = 1; i <= closingBracesOfIFSequence; i++) {
ps += "}";
// Restore the AST generation state
genAST = savegenAST;
// restore save text state
saveText = oldsaveTest;
// Return the finishing info.
if (createdLL1Switch) {
finishingInfo.postscript = ps + "}";
finishingInfo.generatedSwitch = true;
finishingInfo.generatedAnIf = nIF > 0;
//return new JavaBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
else {
finishingInfo.postscript = ps;
finishingInfo.generatedSwitch = false;
finishingInfo.generatedAnIf = nIF > 0;
// return new JavaBlockFinishingInfo(ps, false,nIF>0);
return finishingInfo;
| private void | genElementAST(persistence.antlr.AlternativeElement el)Generate code to link an element reference into the AST
// handle case where you're not building trees, but are in tree walker.
// Just need to get labels set up.
if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
String elementRef;
String astName;
// Generate names and declarations of the AST variable(s)
if (el.getLabel() == null) {
elementRef = lt1Value;
// Generate AST variables for unlabeled stuff
astName = "tmp" + astVarNumber + "_AST";
// Map the generated AST variable in the alternate
mapTreeVariable(el, astName);
// Generate an "input" AST variable also
println(labeledElementASTType + " " + astName + "_in = " + elementRef + ";");
if (grammar.buildAST && syntacticPredLevel == 0) {
boolean needASTDecl =
(genAST &&
(el.getLabel() != null ||
el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
// RK: if we have a grammar element always generate the decl
// since some guy can access it from an action and we can't
// peek ahead (well not without making a mess).
// I'd prefer taking this out.
if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG &&
(el instanceof TokenRefElement))
needASTDecl = true;
boolean doNoGuessTest =
(grammar.hasSyntacticPredicate && needASTDecl);
String elementRef;
String astNameBase;
// Generate names and declarations of the AST variable(s)
if (el.getLabel() != null) {
elementRef = el.getLabel();
astNameBase = el.getLabel();
else {
elementRef = lt1Value;
// Generate AST variables for unlabeled stuff
astNameBase = "tmp" + astVarNumber;
// Generate the declaration if required.
if (needASTDecl) {
// Generate the declaration
if (el instanceof GrammarAtom) {
GrammarAtom ga = (GrammarAtom)el;
if (ga.getASTNodeType() != null) {
genASTDeclaration(el, astNameBase, ga.getASTNodeType());
// println(ga.getASTNodeType()+" " + astName+" = null;");
else {
genASTDeclaration(el, astNameBase, labeledElementASTType);
// println(labeledElementASTType+" " + astName + " = null;");
else {
genASTDeclaration(el, astNameBase, labeledElementASTType);
// println(labeledElementASTType+" " + astName + " = null;");
// for convenience..
String astName = astNameBase + "_AST";
// Map the generated AST variable in the alternate
mapTreeVariable(el, astName);
if (grammar instanceof TreeWalkerGrammar) {
// Generate an "input" AST variable also
println(labeledElementASTType + " " + astName + "_in = null;");
// Enclose actions with !guessing
if (doNoGuessTest) {
// println("if (inputState.guessing==0) {");
// tabs++;
// if something has a label assume it will be used
// so we must initialize the RefAST
if (el.getLabel() != null) {
if (el instanceof GrammarAtom) {
println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + ";");
else {
println(astName + " = " + getASTCreateString(elementRef) + ";");
// if it has no label but a declaration exists initialize it.
if (el.getLabel() == null && needASTDecl) {
elementRef = lt1Value;
if (el instanceof GrammarAtom) {
println(astName + " = " + getASTCreateString((GrammarAtom)el, elementRef) + ";");
else {
println(astName + " = " + getASTCreateString(elementRef) + ";");
// Map the generated AST variable in the alternate
if (grammar instanceof TreeWalkerGrammar) {
// set "input" AST variable also
println(astName + "_in = " + elementRef + ";");
if (genAST) {
switch (el.getAutoGenType()) {
case GrammarElement.AUTO_GEN_NONE:
println("astFactory.addASTChild(currentAST, " + astName + ");");
case GrammarElement.AUTO_GEN_CARET:
println("astFactory.makeASTRoot(currentAST, " + astName + ");");
if (doNoGuessTest) {
// tabs--;
// println("}");
| private void | genErrorCatchForElement(persistence.antlr.AlternativeElement el)Close the try block and generate catch phrases
if the element has a labeled handler in the rule
if (el.getLabel() == null) return;
String r = el.enclosingRuleName;
if (grammar instanceof LexerGrammar) {
r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
if (rs == null) {
antlrTool.panic("Enclosing rule not found!");
ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
if (ex != null) {
| private void | genErrorHandler(persistence.antlr.ExceptionSpec ex)Generate the catch phrases for a user-specified error handler
// Each ExceptionHandler in the ExceptionSpec is a separate catch
for (int i = 0; i < ex.handlers.size(); i++) {
ExceptionHandler handler = (ExceptionHandler)ex.handlers.elementAt(i);
// Generate catch phrase
println("catch (" + handler.exceptionTypeAndName.getText() + ") {");
if (grammar.hasSyntacticPredicate) {
println("if (inputState.guessing==0) {");
// When not guessing, execute user handler action
ActionTransInfo tInfo = new ActionTransInfo();
currentRule, tInfo)
if (grammar.hasSyntacticPredicate) {
println("} else {");
// When guessing, rethrow exception
"throw " +
extractIdOfAction(handler.exceptionTypeAndName) +
// Close catch phrase
| private void | genErrorTryForElement(persistence.antlr.AlternativeElement el)Generate a try { opening if the element has a labeled handler in the rule
if (el.getLabel() == null) return;
String r = el.enclosingRuleName;
if (grammar instanceof LexerGrammar) {
r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r);
if (rs == null) {
antlrTool.panic("Enclosing rule not found!");
ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
if (ex != null) {
println("try { // for error handling");
| protected void | genHeader()Generate a header that is common to all Java files
println("// $ANTLR " + Tool.version + ": " +
"\"" + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\"" +
" -> " +
"\"" + grammar.getClassName() + ".java\"$");
| private void | genLiteralsTest()
println("_ttype = testLiteralsTable(_ttype);");
| private void | genLiteralsTestForPartialToken()
println("_ttype = testLiteralsTable(new String(text.getBuffer(),_begin,text.length()-_begin),_ttype);");
| protected void | genMatch(persistence.antlr.collections.impl.BitSet b)
| protected void | genMatch(persistence.antlr.GrammarAtom atom)
if (atom instanceof StringLiteralElement) {
if (grammar instanceof LexerGrammar) {
else {
else if (atom instanceof CharLiteralElement) {
if (grammar instanceof LexerGrammar) {
else {
antlrTool.error("cannot ref character literals in grammar: " + atom);
else if (atom instanceof TokenRefElement) {
else if (atom instanceof WildcardElement) {
| protected void | genMatchUsingAtomText(persistence.antlr.GrammarAtom atom)
// match() for trees needs the _t cursor
String astArgs = "";
if (grammar instanceof TreeWalkerGrammar) {
astArgs = "_t,";
// if in lexer and ! on element, save buffer index to kill later
if (grammar instanceof LexerGrammar && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
print(atom.not ? "matchNot(" : "match(");
// print out what to match
if (atom.atomText.equals("EOF")) {
// horrible hack to handle EOF case
else {
if (grammar instanceof LexerGrammar && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
println("text.setLength(_saveIndex);"); // kill text atom put in buffer
| protected void | genMatchUsingAtomTokenType(persistence.antlr.GrammarAtom atom)
// match() for trees needs the _t cursor
String astArgs = "";
if (grammar instanceof TreeWalkerGrammar) {
astArgs = "_t,";
// If the literal can be mangled, generate the symbolic constant instead
String mangledName = null;
String s = astArgs + getValueString(atom.getType());
// matching
println((atom.not ? "matchNot(" : "match(") + s + ");");
| public void | genNextToken()Generate the nextToken() rule. nextToken() is a synthetic
lexer rule that is the implicit OR of all user-defined
lexer rules.
// Are there any public rules? If not, then just generate a
// fake nextToken().
boolean hasPublicRules = false;
for (int i = 0; i < grammar.rules.size(); i++) {
RuleSymbol rs = (RuleSymbol)grammar.rules.elementAt(i);
if (rs.isDefined() && rs.access.equals("public")) {
hasPublicRules = true;
if (!hasPublicRules) {
println("public Token nextToken() throws TokenStreamException {");
println("\ttry {uponEOF();}");
println("\tcatch(CharStreamIOException csioe) {");
println("\t\tthrow new TokenStreamIOException(;");
println("\tcatch(CharStreamException cse) {");
println("\t\tthrow new TokenStreamException(cse.getMessage());");
println("\treturn new CommonToken(Token.EOF_TYPE, \"\");");
// Create the synthesized nextToken() rule
RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken");
// Define the nextToken rule symbol
RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
nextTokenRs.access = "private";
// Analyze the nextToken rule
boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
// Generate the next token rule
String filterRule = null;
if (((LexerGrammar)grammar).filterMode) {
filterRule = ((LexerGrammar)grammar).filterRule;
println("public Token nextToken() throws TokenStreamException {");
println("Token theRetToken=null;");
println("for (;;) {");
println("Token _token = null;");
println("int _ttype = Token.INVALID_TYPE;");
if (((LexerGrammar)grammar).filterMode) {
if (filterRule != null) {
// Here's a good place to ensure that the filter rule actually exists
if (!grammar.isDefined(CodeGenerator.encodeLexerRuleName(filterRule))) {
grammar.antlrTool.error("Filter rule " + filterRule + " does not exist in this lexer");
else {
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(CodeGenerator.encodeLexerRuleName(filterRule));
if (!rs.isDefined()) {
grammar.antlrTool.error("Filter rule " + filterRule + " does not exist in this lexer");
else if (rs.access.equals("public")) {
grammar.antlrTool.error("Filter rule " + filterRule + " must be protected");
println("int _m;");
println("_m = mark();");
println("try { // for char stream error handling");
// Generate try around whole thing to trap scanner errors
println("try { // for lexical error handling");
// Test for public lexical rules with empty paths
for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
Alternative a = nextTokenBlk.getAlternativeAt(i);
if (a.cache[1].containsEpsilon()) {
//String r = a.head.toString();
RuleRefElement rr = (RuleRefElement)a.head;
String r = CodeGenerator.decodeLexerRuleName(rr.targetRule);
antlrTool.warning("public lexical rule "+r+" is optional (can match \"nothing\")");
// Generate the block
String newline = System.getProperty("line.separator");
JavaBlockFinishingInfo howToFinish = genCommonBlock(nextTokenBlk, false);
String errFinish = "if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(Token.EOF_TYPE);}";
errFinish += newline + "\t\t\t\t";
if (((LexerGrammar)grammar).filterMode) {
if (filterRule == null) {
errFinish += "else {consume(); continue tryAgain;}";
else {
errFinish += "else {" + newline +
"\t\t\t\t\tcommit();" + newline +
"\t\t\t\t\ttry {m" + filterRule + "(false);}" + newline +
"\t\t\t\t\tcatch(RecognitionException e) {" + newline +
"\t\t\t\t\t // catastrophic failure" + newline +
"\t\t\t\t\t reportError(e);" + newline +
"\t\t\t\t\t consume();" + newline +
"\t\t\t\t\t}" + newline +
"\t\t\t\t\tcontinue tryAgain;" + newline +
else {
errFinish += "else {" + throwNoViable + "}";
genBlockFinish(howToFinish, errFinish);
// at this point a valid token has been matched, undo "mark" that was done
if (((LexerGrammar)grammar).filterMode && filterRule != null) {
// Generate literals test if desired
// make sure _ttype is set first; note _returnToken must be
// non-null as the rule was required to create it.
println("if ( _returnToken==null ) continue tryAgain; // found SKIP token");
println("_ttype = _returnToken.getType();");
if (((LexerGrammar)grammar).getTestLiterals()) {
// return token created by rule reference in switch
println("return _returnToken;");
// Close try block
println("catch (RecognitionException e) {");
if (((LexerGrammar)grammar).filterMode) {
if (filterRule == null) {
println("if ( !getCommitToPath() ) {consume(); continue tryAgain;}");
else {
println("if ( !getCommitToPath() ) {");
println("try {m" + filterRule + "(false);}");
println("catch(RecognitionException ee) {");
println(" // horrendous failure: error in filter rule");
println(" reportError(ee);");
println(" consume();");
println("continue tryAgain;");
if (nextTokenBlk.getDefaultErrorHandler()) {
else {
// pass on to invoking routine
println("throw new TokenStreamRecognitionException(e);");
// close CharStreamException try
println("catch (CharStreamException cse) {");
println(" if ( cse instanceof CharStreamIOException ) {");
println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
println(" }");
println(" else {");
println(" throw new TokenStreamException(cse.getMessage());");
println(" }");
// close for-loop
// close method nextToken
| public void | genRule(persistence.antlr.RuleSymbol s, boolean startSymbol, int ruleNum)Gen a named rule block.
ASTs are generated for each element of an alternative unless
the rule or the alternative have a '!' modifier.
If an alternative defeats the default tree construction, it
must set _AST to the root of the returned AST.
Each alternative that does automatic tree construction, builds
up root and child list pointers in an ASTPair structure.
A rule finishes by setting the returnAST variable from the
tabs = 1;
if (DEBUG_CODE_GENERATOR) System.out.println("genRule(" + s.getId() + ")");
if (!s.isDefined()) {
antlrTool.error("undefined rule: " + s.getId());
// Generate rule return type, name, arguments
RuleBlock rblk = s.getBlock();
currentRule = rblk;
currentASTResult = s.getId();
// clear list of declared ast variables..
// Save the AST generation state, and set it to that of the rule
boolean savegenAST = genAST;
genAST = genAST && rblk.getAutoGen();
// boolean oldsaveTest = saveText;
saveText = rblk.getAutoGen();
// print javadoc comment if any
if (s.comment != null) {
// Gen method access and final qualifier
print(s.access + " final ");
// Gen method return type (note lexer return action set at rule creation)
if (rblk.returnAction != null) {
// Has specified return value
_print(extractTypeOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + " ");
else {
// No specified return value
_print("void ");
// Gen method name
_print(s.getId() + "(");
// Additional rule parameters common to all rules for this grammar
if (commonExtraParams.length() != 0 && rblk.argAction != null) {
// Gen arguments
if (rblk.argAction != null) {
// Has specified arguments
else {
// No specified arguments
// Gen throws clause and open curly
_print(" throws " + exceptionThrown);
if (grammar instanceof ParserGrammar) {
_print(", TokenStreamException");
else if (grammar instanceof LexerGrammar) {
_print(", CharStreamException, TokenStreamException");
// Add user-defined exceptions unless lexer (for now)
if (rblk.throwsSpec != null) {
if (grammar instanceof LexerGrammar) {
antlrTool.error("user-defined throws spec not allowed (yet) for lexer rule " + rblk.ruleName);
else {
_print(", " + rblk.throwsSpec);
_println(" {");
// Convert return action to variable declaration
if (rblk.returnAction != null)
println(rblk.returnAction + ";");
// print out definitions needed by rules for various grammar types
if (grammar.traceRules) {
if (grammar instanceof TreeWalkerGrammar) {
println("traceIn(\"" + s.getId() + "\",_t);");
else {
println("traceIn(\"" + s.getId() + "\");");
if (grammar instanceof LexerGrammar) {
// lexer rule default return value is the rule's token name
// This is a horrible hack to support the built-in EOF lexer rule.
if (s.getId().equals("mEOF"))
println("_ttype = Token.EOF_TYPE;");
println("_ttype = " + s.getId().substring(1) + ";");
println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
println("boolean old_saveConsumedInput=saveConsumedInput;");
if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
// if debugging, write code to mark entry to the rule
if (grammar.debuggingOutput)
if (grammar instanceof ParserGrammar)
println("fireEnterRule(" + ruleNum + ",0);");
else if (grammar instanceof LexerGrammar)
println("fireEnterRule(" + ruleNum + ",_ttype);");
// Generate trace code if desired
if (grammar.debuggingOutput || grammar.traceRules) {
println("try { // debugging");
// Initialize AST variables
if (grammar instanceof TreeWalkerGrammar) {
// "Input" value for rule
println(labeledElementASTType + " " + s.getId() + "_AST_in = (_t == ASTNULL) ? null : (" + labeledElementASTType + ")_t;");
if (grammar.buildAST) {
// Parser member used to pass AST returns from rule invocations
println("returnAST = null;");
// Tracks AST construction
// println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
println("ASTPair currentAST = new ASTPair();");
// User-settable return value for rule.
println(labeledElementASTType + " " + s.getId() + "_AST = null;");
// Search for an unlabeled exception specification attached to the rule
ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
// Generate try block around the entire rule for error handling
if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
println("try { // for error handling");
// Generate the alternatives
if (rblk.alternatives.size() == 1) {
// One alternative -- use simple form
Alternative alt = rblk.getAlternativeAt(0);
String pred = alt.semPred;
if (pred != null)
genSemPred(pred, currentRule.line);
if (alt.synPred != null) {
"Syntactic predicate ignored for single alternative",
genAlt(alt, rblk);
else {
// Multiple alternatives -- generate complex form
boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
JavaBlockFinishingInfo howToFinish = genCommonBlock(rblk, false);
genBlockFinish(howToFinish, throwNoViable);
// Generate catch phrase for error handling
if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
// Close the try block
// Generate user-defined or default catch phrases
if (unlabeledUserSpec != null) {
else if (rblk.getDefaultErrorHandler()) {
// Generate default catch phrase
println("catch (" + exceptionThrown + " ex) {");
// Generate code to handle error if not guessing
if (grammar.hasSyntacticPredicate) {
println("if (inputState.guessing==0) {");
if (!(grammar instanceof TreeWalkerGrammar)) {
// Generate code to consume until token in k==1 follow set
Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, rblk.endNode);
String followSetName = getBitsetName(markBitsetForGen(follow.fset));
println("consumeUntil(" + followSetName + ");");
else {
// Just consume one token
println("if (_t!=null) {_t = _t.getNextSibling();}");
if (grammar.hasSyntacticPredicate) {
// When guessing, rethrow exception
println("} else {");
println(" throw ex;");
// Close catch phrase
// Squirrel away the AST "return" value
if (grammar.buildAST) {
println("returnAST = " + s.getId() + "_AST;");
// Set return tree value for tree walkers
if (grammar instanceof TreeWalkerGrammar) {
println("_retTree = _t;");
// Generate literals test for lexer rules so marked
if (rblk.getTestLiterals()) {
if (s.access.equals("protected")) {
else {
// if doing a lexer rule, dump code to create token if necessary
if (grammar instanceof LexerGrammar) {
println("if ( _createToken && _token==null && _ttype!=Token.SKIP ) {");
println(" _token = makeToken(_ttype);");
println(" _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));");
println("_returnToken = _token;");
// Gen the return statement if there is one (lexer has hard-wired return action)
if (rblk.returnAction != null) {
println("return " + extractIdOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + ";");
if (grammar.debuggingOutput || grammar.traceRules) {
println("} finally { // debugging");
// If debugging, generate calls to mark exit of rule
if (grammar.debuggingOutput)
if (grammar instanceof ParserGrammar)
println("fireExitRule(" + ruleNum + ",0);");
else if (grammar instanceof LexerGrammar)
println("fireExitRule(" + ruleNum + ",_ttype);");
if (grammar.traceRules) {
if (grammar instanceof TreeWalkerGrammar) {
println("traceOut(\"" + s.getId() + "\",_t);");
else {
println("traceOut(\"" + s.getId() + "\");");
// Restore the AST generation state
genAST = savegenAST;
// restore char save state
// saveText = oldsaveTest;
| protected void | genSemPred(java.lang.String pred, int line)
// translate $ and # references
ActionTransInfo tInfo = new ActionTransInfo();
pred = processActionForSpecialSymbols(pred, line, currentRule, tInfo);
// ignore translation info...we don't need to do anything with it.
String escapedPred = charFormatter.escapeString(pred);
// if debugging, wrap the semantic predicate evaluation in a method
// that can tell SemanticPredicateListeners the result
if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
pred = "fireSemanticPredicateEvaluated(persistence.antlr.debug.SemanticPredicateEvent.VALIDATING,"
+ addSemPred(escapedPred) + "," + pred + ")";
println("if (!(" + pred + "))");
println(" throw new SemanticException(\"" + escapedPred + "\");");
| protected void | genSemPredMap()Write an array of Strings which are the semantic predicate
expressions. The debugger will reference them by number only
Enumeration e = semPreds.elements();
println("private String _semPredNames[] = {");
while (e.hasMoreElements())
println("\"" + e.nextElement() + "\",");
| protected void | genSynPred(persistence.antlr.SynPredBlock blk, java.lang.String lookaheadExpr)
if (DEBUG_CODE_GENERATOR) System.out.println("gen=>(" + blk + ")");
// Dump synpred result variable
println("boolean synPredMatched" + blk.ID + " = false;");
// Gen normal lookahead test
println("if (" + lookaheadExpr + ") {");
// Save input state
if (grammar instanceof TreeWalkerGrammar) {
println("AST __t" + blk.ID + " = _t;");
else {
println("int _m" + blk.ID + " = mark();");
// Once inside the try, assume synpred works unless exception caught
println("synPredMatched" + blk.ID + " = true;");
// if debugging, tell listeners that a synpred has started
if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
(grammar instanceof LexerGrammar))) {
println("try {");
gen((AlternativeBlock)blk); // gen code to test predicate
//println("System.out.println(\"pred "+blk+" succeeded\");");
println("catch (" + exceptionThrown + " pe) {");
println("synPredMatched" + blk.ID + " = false;");
//println("System.out.println(\"pred "+blk+" failed\");");
// Restore input state
if (grammar instanceof TreeWalkerGrammar) {
println("_t = __t" + blk.ID + ";");
else {
println("rewind(_m" + blk.ID + ");");
// if debugging, tell listeners how the synpred turned out
if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) ||
(grammar instanceof LexerGrammar))) {
println("if (synPredMatched" + blk.ID + ")");
println(" fireSyntacticPredicateSucceeded();");
println(" fireSyntacticPredicateFailed();");
// Close lookahead test
// Test synred result
println("if ( synPredMatched" + blk.ID + " ) {");
| protected void | genTokenASTNodeMap()Create and set Integer token type objects that map
to Java Class objects (which AST node to create).
println("protected void buildTokenTypeASTClassMap() {");
// Generate a map.put("T","TNode") for each token
// if heterogeneous node known for that token T.
boolean generatedNewHashtable = false;
int n = 0;
// Walk the token vocabulary and generate puts.
Vector v = grammar.tokenManager.getVocabulary();
for (int i = 0; i < v.size(); i++) {
String s = (String)v.elementAt(i);
if (s != null) {
TokenSymbol ts = grammar.tokenManager.getTokenSymbol(s);
if (ts != null && ts.getASTNodeType() != null) {
if ( !generatedNewHashtable ) {
// only generate if we are going to add a mapping
println("tokenTypeToASTClassMap = new Hashtable();");
generatedNewHashtable = true;
println("tokenTypeToASTClassMap.put(new Integer("+ts.getTokenType()+"), "+
if ( n==0 ) {
| public void | genTokenStrings()Generate a static array containing the names of the tokens,
indexed by the token type values. This static array is used
to format error messages so that the token identifers or literal
strings are displayed instead of the token numbers.
If a lexical rule has a paraphrase, use it rather than the
token label.
// Generate a string for each token. This creates a static
// array of Strings indexed by token type.
println("public static final String[] _tokenNames = {");
// Walk the token vocabulary and generate a Vector of strings
// from the tokens.
Vector v = grammar.tokenManager.getVocabulary();
for (int i = 0; i < v.size(); i++) {
String s = (String)v.elementAt(i);
if (s == null) {
s = "<" + String.valueOf(i) + ">";
if (!s.startsWith("\"") && !s.startsWith("<")) {
TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s);
if (ts != null && ts.getParaphrase() != null) {
s = StringUtils.stripFrontBack(ts.getParaphrase(), "\"", "\"");
if (i != v.size() - 1) {
// Close the string array initailizer
| protected void | genTokenTypes(persistence.antlr.TokenManager tm)Generate the token types Java file
// Open the token output Java file and set the currentOutput stream
// SAS: file open was moved to a method so a subclass can override
// This was mainly for the VAJ interface
setupOutput(tm.getName() + TokenTypesFileSuffix);
tabs = 0;
// Generate the header common to all Java files
// Do not use printAction because we assume tabs==0
// Encapsulate the definitions in an interface. This can be done
// because they are all constants.
println("public interface " + tm.getName() + TokenTypesFileSuffix + " {");
// Generate a definition for each token type
Vector v = tm.getVocabulary();
// Do special tokens manually
println("int EOF = " + Token.EOF_TYPE + ";");
println("int NULL_TREE_LOOKAHEAD = " + Token.NULL_TREE_LOOKAHEAD + ";");
for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
String s = (String)v.elementAt(i);
if (s != null) {
if (s.startsWith("\"")) {
// a string literal
StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
if (sl == null) {
antlrTool.panic("String literal " + s + " not in symbol table");
else if (sl.label != null) {
println("int " + sl.label + " = " + i + ";");
else {
String mangledName = mangleLiteral(s);
if (mangledName != null) {
// We were able to create a meaningful mangled token name
println("int " + mangledName + " = " + i + ";");
// if no label specified, make the label equal to the mangled name
sl.label = mangledName;
else {
println("// " + s + " = " + i);
else if (!s.startsWith("<")) {
println("int " + s + " = " + i + ";");
// Close the interface
// Close the tokens output file
currentOutput = null;
| public java.lang.String | getASTCreateString(persistence.antlr.collections.impl.Vector v)Get a string for an expression to generate creation of an AST subtree.
if (v.size() == 0) {
return "";
StringBuffer buf = new StringBuffer();
buf.append("(" + labeledElementASTType +
")astFactory.make( (new ASTArray(" + v.size() +
for (int i = 0; i < v.size(); i++) {
buf.append(".add(" + v.elementAt(i) + ")");
return buf.toString();
| public java.lang.String | getASTCreateString(persistence.antlr.GrammarAtom atom, java.lang.String astCtorArgs)Get a string for an expression to generate creating of an AST node
if (atom != null && atom.getASTNodeType() != null) {
// they specified a type either on the reference or in tokens{} section
return "("+atom.getASTNodeType()+")"+
else {
// must be an action or something since not referencing an atom
return getASTCreateString(astCtorArgs);
| public java.lang.String | getASTCreateString(java.lang.String astCtorArgs)Get a string for an expression to generate creating of an AST node.
Parse the first (possibly only) argument looking for the token type.
If the token type is a valid token symbol, ask for it's AST node type
and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
and as of 2.7.2 #[T,"t",ASTclassname].
//System.out.println("AST CTOR: "+astCtorArgs);
if ( astCtorArgs==null ) {
astCtorArgs = "";
int nCommas = 0;
for (int i=0; i<astCtorArgs.length(); i++) {
if ( astCtorArgs.charAt(i)=='," ) {
//System.out.println("num commas="+nCommas);
if ( nCommas<2 ) { // if 1 or 2 args
int firstComma = astCtorArgs.indexOf(',");
int lastComma = astCtorArgs.lastIndexOf(',");
String tokenName = astCtorArgs;
if ( nCommas>0 ) {
tokenName = astCtorArgs.substring(0,firstComma);
//System.out.println("Checking for ast node type of "+tokenName);
TokenSymbol ts = grammar.tokenManager.getTokenSymbol(tokenName);
if ( ts!=null ) {
String astNodeType = ts.getASTNodeType();
//System.out.println("node type of "+tokenName+" is "+astNodeType);
String emptyText = "";
if ( nCommas==0 ) {
// need to add 2nd arg of blank text for token text
emptyText = ",\"\"";
if ( astNodeType!=null ) {
return "("+astNodeType+")"+
// fall through and just do a regular create with cast on front
// if necessary (it differs from default "AST").
if ( labeledElementASTType.equals("AST") ) {
return "astFactory.create("+astCtorArgs+")";
return "("+labeledElementASTType+")"+
// create default type or (since 2.7.2) 3rd arg is classname
return "(" + labeledElementASTType + ")astFactory.create(" + astCtorArgs + ")";
| protected java.lang.String | getLookaheadTestExpression(persistence.antlr.Lookahead[] look, int k)
StringBuffer e = new StringBuffer(100);
boolean first = true;
for (int i = 1; i <= k; i++) {
BitSet p = look[i].fset;
if (!first) {
e.append(") && (");
first = false;
// Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
// There is no way to predict what that token would be. Just
// allow anything instead.
if (look[i].containsEpsilon()) {
else {
e.append(getLookaheadTestTerm(i, p));
return e.toString();
| protected java.lang.String | getLookaheadTestExpression(persistence.antlr.Alternative alt, int maxDepth)Generate a lookahead test expression for an alternate. This
will be a series of tests joined by '&&' and enclosed by '()',
the number of such tests being determined by the depth of the lookahead.
int depth = alt.lookaheadDepth;
if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
// if the decision is nondeterministic, do the best we can: LL(k)
// any predicates that are around will be generated later.
depth = grammar.maxk;
if (maxDepth == 0) {
// empty lookahead can result from alt with sem pred
// that can see end of token. E.g., A : {pred}? ('a')? ;
return "( true )";
return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
| protected java.lang.String | getLookaheadTestTerm(int k, persistence.antlr.collections.impl.BitSet p)Generate a depth==1 lookahead test expression given the BitSet.
This may be one of:
1) a series of 'x==X||' tests
2) a range test using >= && <= where possible,
3) a bitset membership test for complex comparisons
// Determine the name of the item to be compared
String ts = lookaheadString(k);
// Generate a range expression if possible
int[] elems = p.toArray();
if (elementsAreRange(elems)) {
return getRangeExpression(k, elems);
// Generate a bitset membership test if possible
StringBuffer e;
int degree =;
if (degree == 0) {
return "true";
if (degree >= bitsetTestThreshold) {
int bitsetIdx = markBitsetForGen(p);
return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
// Otherwise, generate the long-winded series of "x==X||" tests
e = new StringBuffer();
for (int i = 0; i < elems.length; i++) {
// Get the compared-to item (token or character value)
String cs = getValueString(elems[i]);
// Generate the element comparison
if (i > 0) e.append("||");
return e.toString();
| public java.lang.String | getRangeExpression(int k, int[] elems)Return an expression for testing a contiguous renage of elements
if (!elementsAreRange(elems)) {
antlrTool.panic("getRangeExpression called with non-range");
int begin = elems[0];
int end = elems[elems.length - 1];
"(" + lookaheadString(k) + " >= " + getValueString(begin) + " && " +
lookaheadString(k) + " <= " + getValueString(end) + ")";
| private java.lang.String | getValueString(int value)getValueString: get a string representation of a token or char value
String cs;
if (grammar instanceof LexerGrammar) {
cs = charFormatter.literalChar(value);
else {
TokenSymbol ts = grammar.tokenManager.getTokenSymbolAt(value);
if (ts == null) {
return "" + value; // return token type as string
// tool.panic("vocabulary for token type " + value + " is null");
String tId = ts.getId();
if (ts instanceof StringLiteralSymbol) {
// if string literal, use predefined label if any
// if no predefined, try to mangle into LITERAL_xxx.
// if can't mangle, use int value as last resort
StringLiteralSymbol sl = (StringLiteralSymbol)ts;
String label = sl.getLabel();
if (label != null) {
cs = label;
else {
cs = mangleLiteral(tId);
if (cs == null) {
cs = String.valueOf(value);
else {
cs = tId;
return cs;
| protected boolean | lookaheadIsEmpty(persistence.antlr.Alternative alt, int maxDepth)Is the lookahead for this alt empty?
int depth = alt.lookaheadDepth;
if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
depth = grammar.maxk;
for (int i = 1; i <= depth && i <= maxDepth; i++) {
BitSet p = alt.cache[i].fset;
if ( != 0) {
return false;
return true;
| private java.lang.String | lookaheadString(int k)
if (grammar instanceof TreeWalkerGrammar) {
return "_t.getType()";
return "LA(" + k + ")";
| private java.lang.String | mangleLiteral(java.lang.String s)Mangle a string literal into a meaningful token name. This is
only possible for literals that are all characters. The resulting
mangled literal name is literalsPrefix with the text of the literal
String mangled = antlrTool.literalsPrefix;
for (int i = 1; i < s.length() - 1; i++) {
if (!Character.isLetter(s.charAt(i)) &&
s.charAt(i) != '_") {
return null;
mangled += s.charAt(i);
if (antlrTool.upperCaseMangledLiterals) {
mangled = mangled.toUpperCase();
return mangled;
| public java.lang.String | mapTreeId(java.lang.String idParam, persistence.antlr.ActionTransInfo transInfo)Map an identifier to it's corresponding tree-node variable.
This is context-sensitive, depending on the rule and alternative
being generated
// if not in an action of a rule, nothing to map.
if (currentRule == null) return idParam;
boolean in_var = false;
String id = idParam;
if (grammar instanceof TreeWalkerGrammar) {
if (!grammar.buildAST) {
in_var = true;
// If the id ends with "_in", then map it to the input variable
else if (id.length() > 3 && id.lastIndexOf("_in") == id.length() - 3) {
// Strip off the "_in"
id = id.substring(0, id.length() - 3);
in_var = true;
// Check the rule labels. If id is a label, then the output
// variable is label_AST, and the input variable is plain label.
for (int i = 0; i < currentRule.labeledElements.size(); i++) {
AlternativeElement elt = (AlternativeElement)currentRule.labeledElements.elementAt(i);
if (elt.getLabel().equals(id)) {
return in_var ? id : id + "_AST";
// Failing that, check the id-to-variable map for the alternative.
// If the id is in the map, then output variable is the name in the
// map, and input variable is name_in
String s = (String)treeVariableMap.get(id);
if (s != null) {
if (s == NONUNIQUE) {
// There is more than one element with this id
antlrTool.error("Ambiguous reference to AST element "+id+
" in rule "+currentRule.getRuleName());
return null;
else if (s.equals(currentRule.getRuleName())) {
// a recursive call to the enclosing rule is
// ambiguous with the rule itself.
antlrTool.error("Ambiguous reference to AST element "+id+
" in rule "+currentRule.getRuleName());
return null;
else {
return in_var ? s + "_in" : s;
// Failing that, check the rule name itself. Output variable
// is rule_AST; input variable is rule_AST_in (treeparsers).
if (id.equals(currentRule.getRuleName())) {
String r = in_var ? id + "_AST_in" : id + "_AST";
if (transInfo != null) {
if (!in_var) {
transInfo.refRuleRoot = r;
return r;
else {
// id does not map to anything -- return itself.
return id;
| private void | mapTreeVariable(persistence.antlr.AlternativeElement e, java.lang.String name)Given an element and the name of an associated AST variable,
create a mapping between the element "name" and the variable name.
// For tree elements, defer to the root
if (e instanceof TreeElement) {
mapTreeVariable(((TreeElement)e).root, name);
// Determine the name of the element, if any, for mapping purposes
String elName = null;
// Don't map labeled items
if (e.getLabel() == null) {
if (e instanceof TokenRefElement) {
// use the token id
elName = ((TokenRefElement)e).atomText;
else if (e instanceof RuleRefElement) {
// use the rule name
elName = ((RuleRefElement)e).targetRule;
// Add the element to the tree variable map if it has a name
if (elName != null) {
if (treeVariableMap.get(elName) != null) {
// Name is already in the map -- mark it as duplicate
treeVariableMap.put(elName, NONUNIQUE);
else {
treeVariableMap.put(elName, name);
| protected java.lang.String | processActionForSpecialSymbols(java.lang.String actionStr, int line, persistence.antlr.RuleBlock currentRule, persistence.antlr.ActionTransInfo tInfo)Lexically process $var and tree-specifiers in the action.
This will replace #id and #(...) with the appropriate
function calls and/or variables etc...
if (actionStr == null || actionStr.length() == 0) return null;
// The action trans info tells us (at the moment) whether an
// assignment was done to the rule's tree root.
if (grammar == null)
return actionStr;
// see if we have anything to do...
if ((grammar.buildAST && actionStr.indexOf('#") != -1) ||
grammar instanceof TreeWalkerGrammar ||
((grammar instanceof LexerGrammar ||
grammar instanceof ParserGrammar)
&& actionStr.indexOf('$") != -1)) {
// Create a lexer to read an action and return the translated version lexer =
try {
actionStr = lexer.getTokenObject().getText();
// System.out.println("action translated: "+actionStr);
// System.out.println("trans info is "+tInfo);
catch (RecognitionException ex) {
return actionStr;
catch (TokenStreamException tex) {
antlrTool.panic("Error reading action:" + actionStr);
return actionStr;
catch (CharStreamException io) {
antlrTool.panic("Error reading action:" + actionStr);
return actionStr;
return actionStr;
| private void | setupGrammarParameters(persistence.antlr.Grammar g)
if (g instanceof ParserGrammar) {
labeledElementASTType = "AST";
if (g.hasOption("ASTLabelType")) {
Token tsuffix = g.getOption("ASTLabelType");
if (tsuffix != null) {
String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
if (suffix != null) {
labeledElementASTType = suffix;
labeledElementType = "Token ";
labeledElementInit = "null";
commonExtraArgs = "";
commonExtraParams = "";
commonLocalVars = "";
lt1Value = "LT(1)";
exceptionThrown = "RecognitionException";
throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
else if (g instanceof LexerGrammar) {
labeledElementType = "char ";
labeledElementInit = "'\\0'";
commonExtraArgs = "";
commonExtraParams = "boolean _createToken";
commonLocalVars = "int _ttype; Token _token=null; int _begin=text.length();";
lt1Value = "LA(1)";
exceptionThrown = "RecognitionException";
throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
else if (g instanceof TreeWalkerGrammar) {
labeledElementASTType = "AST";
labeledElementType = "AST";
if (g.hasOption("ASTLabelType")) {
Token tsuffix = g.getOption("ASTLabelType");
if (tsuffix != null) {
String suffix = StringUtils.stripFrontBack(tsuffix.getText(), "\"", "\"");
if (suffix != null) {
labeledElementASTType = suffix;
labeledElementType = suffix;
if (!g.hasOption("ASTLabelType")) {
g.setOption("ASTLabelType", new Token(ANTLRTokenTypes.STRING_LITERAL, "AST"));
labeledElementInit = "null";
commonExtraArgs = "_t";
commonExtraParams = "AST _t";
commonLocalVars = "";
lt1Value = "(" + labeledElementASTType + ")_t";
exceptionThrown = "RecognitionException";
throwNoViable = "throw new NoViableAltException(_t);";
else {
antlrTool.panic("Unknown grammar type");
| public void | setupOutput(java.lang.String className)This method exists so a subclass, namely VAJCodeGenerator,
can open the file in its own evil way. JavaCodeGenerator
simply opens a text file...
currentOutput = antlrTool.openOutputFile(className + ".java");
| private static boolean | suitableForCaseExpression(persistence.antlr.Alternative a)
a.lookaheadDepth == 1 &&
a.semPred == null &&
!a.cache[1].containsEpsilon() &&
a.cache[1] <= caseSizeThreshold;