File Doc Category Size Date Package
LLkAnalyzer.java API Doc Glassfish v2 API 47177 Wed Aug 30 15:34:08 BST 2006 persistence.antlr

LLkAnalyzer

java.lang.Object

public class LLkAnalyzer extends Object implements LLkGrammarAnalyzer

A linear-approximate LL(k) grammar analzyer. All lookahead elements are sets of token types.

author: Terence Parr, John Lilley
see: persistence.antlr.Grammar
see: persistence.antlr.Lookahead

Fields Summary
public boolean
DEBUG_ANALYZER
private AlternativeBlock
currentBlock
protected Tool
tool
protected Grammar
grammar
protected boolean
lexicalAnalysis
CharFormatter
charFormatter
Constructors Summary
public LLkAnalyzer(Tool tool_)
Create an LLk analyzer
tool = tool_;
Methods Summary
public persistence.antlr.Lookahead FOLLOW(int k, persistence.antlr.RuleEndElement end)
Compute the lookahead set of whatever follows references to the rule associated witht the FOLLOW block.
// what rule are we trying to compute FOLLOW of? RuleBlock rb = (RuleBlock)end.block; // rule name is different in lexer String rule; if (lexicalAnalysis) { rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName()); } else { rule = rb.getRuleName(); } if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")"); // are we in the midst of computing this FOLLOW already? if (end.lock[k]) { if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule); return new Lookahead(rule); } // Check to see if there is cached value if (end.cache[k] != null) { if (DEBUG_ANALYZER) { System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar)); } // if the cache is a complete computation then simply return entry if (end.cache[k].cycle == null) { return (Lookahead)end.cache[k].clone(); } // A cache entry exists, but it is a reference to a cyclic computation. RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle); RuleEndElement re = rs.getBlock().endNode; // The other entry may not exist because it is still being // computed when this cycle cache entry was found here. if (re.cache[k] == null) { // return the cycle...that's all we can do at the moment. return (Lookahead)end.cache[k].clone(); } else { if (DEBUG_ANALYZER) { System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar)); } // combine results from other rule's FOLLOW if ( re.cache[k].cycle==null ) { // current rule depends on another rule's FOLLOW and // it is complete with no cycle; just kill our cycle and // combine full result from other rule's FOLLOW end.cache[k].combineWith(re.cache[k]); end.cache[k].cycle = null; // kill cycle as we're complete } else { // the FOLLOW cache for other rule has a cycle also. // Here is where we bubble up a cycle. We better recursively // wipe out cycles (partial computations). I'm a little nervous // that we might leave a cycle here, however. Lookahead refFOLLOW = FOLLOW(k, re); end.cache[k].combineWith( refFOLLOW ); // all cycles should be gone, but if not, record ref to cycle end.cache[k].cycle = refFOLLOW.cycle; } if (DEBUG_ANALYZER) { System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar)); } // Return the updated cache entry associated // with the cycle reference. return (Lookahead)end.cache[k].clone(); } } end.lock[k] = true; // prevent FOLLOW computation cycles Lookahead p = new Lookahead(); RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule); // Walk list of references to this rule to compute FOLLOW for (int i = 0; i < rs.numReferences(); i++) { RuleRefElement rr = rs.getReference(i); if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString()); Lookahead q = rr.next.look(k); if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString()); /* If there is a cycle then if the cycle is to the rule for * this end block, you have a cycle to yourself. Remove the * cycle indication--the lookahead is complete. */ if (q.cycle != null && q.cycle.equals(rule)) { q.cycle = null; // don't want cycle to yourself! } // add the lookahead into the current FOLLOW computation set p.combineWith(q); if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString()); } end.lock[k] = false; // we're not doing FOLLOW anymore // if no rules follow this, it can be a start symbol or called by a start sym. // set the follow to be end of file. if (p.fset.nil() && p.cycle == null) { if (grammar instanceof TreeWalkerGrammar) { // Tree grammars don't see EOF, they see end of sibling list or // "NULL TREE LOOKAHEAD". p.fset.add(Token.NULL_TREE_LOOKAHEAD); } else if (grammar instanceof LexerGrammar) { // Lexical grammars use Epsilon to indicate that the end of rule has been hit // EOF would be misleading; any character can follow a token rule not just EOF // as in a grammar (where a start symbol is followed by EOF). There is no // sequence info in a lexer between tokens to indicate what is the last token // to be seen. // p.fset.add(EPSILON_TYPE); p.setEpsilon(); } else { p.fset.add(Token.EOF_TYPE); } } // Cache the result of the FOLLOW computation if (DEBUG_ANALYZER) { System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar)); } end.cache[k] = (Lookahead)p.clone(); return p;
protected boolean altUsesWildcardDefault(persistence.antlr.Alternative alt)
Return true if someone used the '.' wildcard default idiom. Either #(. children) or '.' as an alt by itself.
AlternativeElement head = alt.head; // if element is #(. blah) then check to see if el is root if (head instanceof TreeElement && ((TreeElement)head).root instanceof WildcardElement) { return true; } if (head instanceof WildcardElement && head.next instanceof BlockEndElement) { return true; } return false;
public boolean deterministic(persistence.antlr.AlternativeBlock blk)
Is this block of alternatives LL(k)? Fill in alternative cache for this block.
return
true if the block is deterministic
/** The lookahead depth for this decision */ int k = 1; // start at k=1 if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")"); boolean det = true; int nalts = blk.alternatives.size(); AlternativeBlock saveCurrentBlock = currentBlock; Alternative wildcardAlt = null; currentBlock = blk; /* don't allow nongreedy (...) blocks */ if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) { tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn()); } // SPECIAL CASE: only one alternative. We don't need to check the // determinism, but other code expects the lookahead cache to be // set for the single alt. if (nalts == 1) { AlternativeElement e = blk.getAlternativeAt(0).head; currentBlock.alti = 0; blk.getAlternativeAt(0).cache[1] = e.look(1); blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1) currentBlock = saveCurrentBlock; return true; // always deterministic for one alt } outer: for (int i = 0; i < nalts - 1; i++) { currentBlock.alti = i; currentBlock.analysisAlt = i; // which alt are we analyzing? currentBlock.altj = i + 1; // reset this alt. Haven't computed yet, // but we need the alt number. inner: // compare against other alternatives with lookahead depth k for (int j = i + 1; j < nalts; j++) { currentBlock.altj = j; if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j); currentBlock.analysisAlt = j; // which alt are we analyzing? k = 1; // always attempt minimum lookahead possible. // check to see if there is a lookahead depth that distinguishes // between alternatives i and j. Lookahead[] r = new Lookahead[grammar.maxk + 1]; boolean haveAmbiguity; do { haveAmbiguity = false; if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk); Lookahead p,q; p = getAltLookahead(blk, i, k); q = getAltLookahead(blk, j, k); // compare LOOK(alt i) with LOOK(alt j). Is there an intersection? // Lookahead must be disjoint. if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar)); if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar)); // r[i] = p.fset.and(q.fset); r[k] = p.intersection(q); if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString()); if (!r[k].nil()) { haveAmbiguity = true; k++; } // go until no more lookahead to use or no intersection } while (haveAmbiguity && k <= grammar.maxk); Alternative ai = blk.getAlternativeAt(i); Alternative aj = blk.getAlternativeAt(j); if (haveAmbiguity) { det = false; ai.lookaheadDepth = NONDETERMINISTIC; aj.lookaheadDepth = NONDETERMINISTIC; /* if ith alt starts with a syntactic predicate, computing the * lookahead is still done for code generation, but messages * should not be generated when comparing against alt j. * Alternatives with syn preds that are unnecessary do * not result in syn pred try-blocks. */ if (ai.synPred != null) { if (DEBUG_ANALYZER) { System.out.println("alt " + i + " has a syn pred"); } // The alt with the (...)=> block is nondeterministic for sure. // If the (...)=> conflicts with alt j, j is nondeterministic. // This prevents alt j from being in any switch statements. // move on to next alternative=>no possible ambiguity! // continue inner; } /* if ith alt starts with a semantic predicate, computing the * lookahead is still done for code generation, but messages * should not be generated when comparing against alt j. */ else if (ai.semPred != null) { if (DEBUG_ANALYZER) { System.out.println("alt " + i + " has a sem pred"); } } /* if jth alt is exactly the wildcard or wildcard root of tree, * then remove elements from alt i lookahead from alt j's lookahead. * Don't do an ambiguity warning. */ else if (altUsesWildcardDefault(aj)) { // System.out.println("removing pred sets"); // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk); wildcardAlt = aj; } /* If the user specified warnWhenFollowAmbig=false, then we * can turn off this warning IFF one of the alts is empty; * that is, it points immediately at the end block. */ else if (!blk.warnWhenFollowAmbig && (ai.head instanceof BlockEndElement || aj.head instanceof BlockEndElement)) { // System.out.println("ai.head pts to "+ai.head.getClass()); // System.out.println("aj.head pts to "+aj.head.getClass()); } /* If they have the generateAmbigWarnings option off for the block * then don't generate a warning. */ else if (!blk.generateAmbigWarnings) { } /* If greedy=true and *one* empty alt shut off warning. */ else if (blk.greedySet && blk.greedy && ((ai.head instanceof BlockEndElement && !(aj.head instanceof BlockEndElement)) || (aj.head instanceof BlockEndElement && !(ai.head instanceof BlockEndElement)))) { // System.out.println("greedy set to true; one alt empty"); } /* We have no choice, but to report a nondetermism */ else { tool.errorHandler.warnAltAmbiguity( grammar, blk, // the block lexicalAnalysis, // true if lexical grammar.maxk, // depth of ambiguity r, // set of linear ambiguities i, // first ambiguous alternative j // second ambiguous alternative ); } } else { // a lookahead depth, k, was found where i and j do not conflict ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k); aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k); } } } // finished with block. // If had wildcard default clause idiom, remove competing lookahead /* if ( wildcardAlt!=null ) { removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk); } */ currentBlock = saveCurrentBlock; return det;
public boolean deterministic(persistence.antlr.OneOrMoreBlock blk)
Is (...)+ block LL(1)? Fill in alternative cache for this block.
return
true if the block is deterministic
if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")"); AlternativeBlock saveCurrentBlock = currentBlock; currentBlock = blk; boolean blkOk = deterministic((AlternativeBlock)blk); // block has been checked, now check that what follows does not conflict // with the lookahead of the (...)+ block. boolean det = deterministicImpliedPath(blk); currentBlock = saveCurrentBlock; return det && blkOk;
public boolean deterministic(persistence.antlr.ZeroOrMoreBlock blk)
Is (...)* block LL(1)? Fill in alternative cache for this block.
return
true if the block is deterministic
if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")"); AlternativeBlock saveCurrentBlock = currentBlock; currentBlock = blk; boolean blkOk = deterministic((AlternativeBlock)blk); // block has been checked, now check that what follows does not conflict // with the lookahead of the (...)* block. boolean det = deterministicImpliedPath(blk); currentBlock = saveCurrentBlock; return det && blkOk;
public boolean deterministicImpliedPath(persistence.antlr.BlockWithImpliedExitPath blk)
Is this (...)* or (...)+ block LL(k)?
return
true if the block is deterministic
/** The lookahead depth for this decision considering implied exit path */ int k; boolean det = true; Vector alts = blk.getAlternatives(); int nalts = alts.size(); currentBlock.altj = -1; // comparing against implicit optional/exit alt if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath"); for (int i = 0; i < nalts; i++) { // check follow against all alts Alternative alt = blk.getAlternativeAt(i); if (alt.head instanceof BlockEndElement) { tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn()); } k = 1; // assume eac alt is LL(1) with exit branch // check to see if there is a lookahead depth that distinguishes // between alternative i and the exit branch. Lookahead[] r = new Lookahead[grammar.maxk + 1]; boolean haveAmbiguity; do { haveAmbiguity = false; if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk); Lookahead p; Lookahead follow = blk.next.look(k); blk.exitCache[k] = follow; currentBlock.alti = i; p = getAltLookahead(blk, i, k); if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar)); if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar)); //r[k] = follow.fset.and(p.fset); r[k] = follow.intersection(p); if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]); if (!r[k].nil()) { haveAmbiguity = true; k++; } // go until no more lookahead to use or no intersection } while (haveAmbiguity && k <= grammar.maxk); if (haveAmbiguity) { det = false; alt.lookaheadDepth = NONDETERMINISTIC; blk.exitLookaheadDepth = NONDETERMINISTIC; Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti); /* If the user specified warnWhenFollowAmbig=false, then we * can turn off this warning. */ if (!blk.warnWhenFollowAmbig) { } /* If they have the generateAmbigWarnings option off for the block * then don't generate a warning. */ else if (!blk.generateAmbigWarnings) { } /* If greedy=true and alt not empty, shut off warning */ else if (blk.greedy == true && blk.greedySet && !(ambigAlt.head instanceof BlockEndElement)) { if (DEBUG_ANALYZER) System.out.println("greedy loop"); } /* If greedy=false then shut off warning...will have * to add "if FOLLOW break" * block during code gen to compensate for removal of warning. */ else if (blk.greedy == false && !(ambigAlt.head instanceof BlockEndElement)) { if (DEBUG_ANALYZER) System.out.println("nongreedy loop"); // if FOLLOW not single k-string (|set[k]| can // be > 1 actually) then must warn them that // loop may terminate incorrectly. // For example, ('a'..'d')+ ("ad"|"cb") if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) { tool.warning(new String[]{ "nongreedy block may exit incorrectly due", "\tto limitations of linear approximate lookahead (first k-1 sets", "\tin lookahead not singleton)."}, grammar.getFilename(), blk.getLine(), blk.getColumn()); } } // no choice but to generate a warning else { tool.errorHandler.warnAltExitAmbiguity( grammar, blk, // the block lexicalAnalysis, // true if lexical grammar.maxk, // depth of ambiguity r, // set of linear ambiguities i // ambiguous alternative ); } } else { alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k); blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k); } } return det;
private persistence.antlr.Lookahead getAltLookahead(persistence.antlr.AlternativeBlock blk, int alt, int k)
Lookahead p; Alternative a = blk.getAlternativeAt(alt); AlternativeElement e = a.head; //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length); if (a.cache[k] == null) { p = e.look(k); a.cache[k] = p; } else { p = a.cache[k]; } return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.AlternativeBlock blk)
Combine the lookahead computed for each alternative
if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")"); AlternativeBlock saveCurrentBlock = currentBlock; currentBlock = blk; Lookahead p = new Lookahead(); for (int i = 0; i < blk.alternatives.size(); i++) { if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk); // must set analysis alt currentBlock.analysisAlt = i; Alternative alt = blk.getAlternativeAt(i); AlternativeElement elem = alt.head; if (DEBUG_ANALYZER) { if (alt.head == alt.tail) { System.out.println("alt " + i + " is empty"); } } Lookahead q = elem.look(k); p.combineWith(q); } if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) { // Invert the lookahead set if (lexicalAnalysis) { BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); int[] elems = p.fset.toArray(); for (int j = 0; j < elems.length; j++) { b.remove(elems[j]); } p.fset = b; } else { p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType()); } } currentBlock = saveCurrentBlock; return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.BlockEndElement end)
Compute what follows this place-holder node and possibly what begins the associated loop unless the node is locked.
if we hit the end of a loop, we have to include what tokens can begin the loop as well. If the start node is locked, then we simply found an empty path through this subrule while analyzing it. If the start node is not locked, then this node was hit during a FOLLOW operation and the FIRST of this block must be included in that lookahead computation.
if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]); if (end.lock[k]) { // computation in progress => the tokens we would have // computed (had we not been locked) will be included // in the set by that computation with the lock on this // node. return new Lookahead(); } Lookahead p; /* Hitting the end of a loop means you can see what begins the loop */ if (end.block instanceof ZeroOrMoreBlock || end.block instanceof OneOrMoreBlock) { // compute what can start the block, // but lock end node so we don't do it twice in same // computation. end.lock[k] = true; p = look(k, end.block); end.lock[k] = false; } else { p = new Lookahead(); } /* Tree blocks do not have any follow because they are children * of what surrounds them. For example, A #(B C) D results in * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which * indicates that nothing can follow the last node of tree #(B C) */ if (end.block instanceof TreeElement) { p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD)); } /* Syntactic predicates such as ( (A)? )=> have no follow per se. * We cannot accurately say what would be matched following a * syntactic predicate (you MIGHT be ok if you said it was whatever * followed the alternative predicted by the predicate). Hence, * (like end-of-token) we return Epsilon to indicate "unknown * lookahead." */ else if (end.block instanceof SynPredBlock) { p.setEpsilon(); } // compute what can follow the block else { Lookahead q = end.block.next.look(k); p.combineWith(q); } return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.CharLiteralElement atom)
Return this char as the lookahead if k=1.
### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
If the atom has the not flag on, then create the set complement of the tokenType which is the set of all characters referenced in the grammar with this char turned off. Also remove characters from the set that are currently allocated for predicting previous alternatives. This avoids ambiguity messages and is more properly what is meant. ( 'a' | ~'a' ) implies that the ~'a' is the "else" clause.
NOTE: we do NOT include exit path in the exclusion set. E.g., ( 'a' | ~'a' )* 'b' should exit upon seeing a 'b' during the loop.
if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")"); // Skip until analysis hits k==1 if (k > 1) { return atom.next.look(k - 1); } if (lexicalAnalysis) { if (atom.not) { BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString()); // remove stuff predicted by preceding alts and follow of block removeCompetingPredictionSets(b, atom); if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString()); // now remove element that is stated not to be in the set b.clear(atom.getType()); return new Lookahead(b); } else { return Lookahead.of(atom.getType()); } } else { // Should have been avoided by MakeGrammar tool.panic("Character literal reference found in parser"); // ... so we make the compiler happy return Lookahead.of(atom.getType()); }
public persistence.antlr.Lookahead look(int k, persistence.antlr.CharRangeElement r)
if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")"); // Skip until analysis hits k==1 if (k > 1) { return r.next.look(k - 1); } BitSet p = BitSet.of(r.begin); for (int i = r.begin + 1; i <= r.end; i++) { p.add(i); } return new Lookahead(p);
public persistence.antlr.Lookahead look(int k, persistence.antlr.GrammarAtom atom)
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])"); if (lexicalAnalysis) { // MakeGrammar should have created a rule reference instead tool.panic("token reference found in lexer"); } // Skip until analysis hits k==1 if (k > 1) { return atom.next.look(k - 1); } Lookahead l = Lookahead.of(atom.getType()); if (atom.not) { // Invert the lookahead set against the token vocabulary int maxToken = grammar.tokenManager.maxTokenType(); l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); // remove stuff predicted by preceding alts and follow of block removeCompetingPredictionSets(l.fset, atom); } return l;
public persistence.antlr.Lookahead look(int k, persistence.antlr.OneOrMoreBlock blk)
The lookahead of a (...)+ block is the combined lookahead of all alternatives and, if an empty path is found, the lookahead of what follows the block.
if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")"); Lookahead p = look(k, (AlternativeBlock)blk); return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.RuleBlock blk)
Combine the lookahead computed for each alternative. Lock the node so that no other computation may come back on itself--infinite loop. This also implies infinite left-recursion in the grammar (or an error in this algorithm ;)).
if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")"); Lookahead p = look(k, (AlternativeBlock)blk); return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.RuleEndElement end)
If not locked or noFOLLOW set, compute FOLLOW of a rule.
TJP says 8/12/99: not true anymore: Lexical rules never compute follow. They set epsilon and the code generator gens code to check for any character. The code generator must remove the tokens used to predict any previous alts in the same block.
When the last node of a rule is reached and noFOLLOW, it implies that a "local" FOLLOW will be computed after this call. I.e.,
a : b A; b : B | ; c : b C;
Here, when computing the look of rule b from rule a, we want only {B,EPSILON_TYPE} so that look(b A) will be {B,A} not {B,A,C}.
if the end block is not locked and the FOLLOW is wanted, the algorithm must compute the lookahead of what follows references to this rule. If end block is locked, FOLLOW will return an empty set with a cycle to the rule associated with this end block.
if (DEBUG_ANALYZER) System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" + end.noFOLLOW + "; lock is " + end.lock[k]); if (/*lexicalAnalysis ||*/ end.noFOLLOW) { Lookahead p = new Lookahead(); p.setEpsilon(); p.epsilonDepth = BitSet.of(k); return p; } Lookahead p = FOLLOW(k, end); return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.RuleRefElement rr)
Compute the lookahead contributed by a rule reference.
When computing ruleref lookahead, we don't want the FOLLOW computation done if an empty path exists for the rule. The FOLLOW is too loose of a set...we want only to include the "local" FOLLOW or what can follow this particular ref to the node. In other words, we use context information to reduce the complexity of the analysis and strengthen the parser. The noFOLLOW flag is used as a means of restricting the FOLLOW to a "local" FOLLOW. This variable is orthogonal to the lock variable that prevents infinite recursion. noFOLLOW does not care about what k is.
if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")"); RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule); if (rs == null || !rs.defined) { tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn()); return new Lookahead(); } RuleBlock rb = rs.getBlock(); RuleEndElement end = rb.endNode; boolean saveEnd = end.noFOLLOW; end.noFOLLOW = true; // go off to the rule and get the lookahead (w/o FOLLOW) Lookahead p = look(k, rr.targetRule); if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule); // restore state of end block end.noFOLLOW = saveEnd; // check for infinite recursion. If a cycle is returned: trouble! if (p.cycle != null) { tool.error("infinite recursion to rule " + p.cycle + " from rule " + rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn()); } // is the local FOLLOW required? if (p.containsEpsilon()) { if (DEBUG_ANALYZER) System.out.println("rule ref to " + rr.targetRule + " has eps, depth: " + p.epsilonDepth); // remove epsilon p.resetEpsilon(); // fset.clear(EPSILON_TYPE); // for each lookahead depth that saw epsilon int[] depths = p.epsilonDepth.toArray(); p.epsilonDepth = null; // clear all epsilon stuff for (int i = 0; i < depths.length; i++) { int rk = k - (k - depths[i]); Lookahead q = rr.next.look(rk); // see comments in Lookahead p.combineWith(q); } // note: any of these look() computations for local follow can // set EPSILON in the set again if the end of this rule is found. } return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.StringLiteralElement atom)
if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")"); if (lexicalAnalysis) { // need more lookahead than string can provide? if (k > atom.processedAtomText.length()) { return atom.next.look(k - atom.processedAtomText.length()); } else { // get char at lookahead depth k, from the processed literal text return Lookahead.of(atom.processedAtomText.charAt(k - 1)); } } else { // Skip until analysis hits k==1 if (k > 1) { return atom.next.look(k - 1); } Lookahead l = Lookahead.of(atom.getType()); if (atom.not) { // Invert the lookahead set against the token vocabulary int maxToken = grammar.tokenManager.maxTokenType(); l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); } return l; }
public persistence.antlr.Lookahead look(int k, persistence.antlr.SynPredBlock blk)
The lookahead of a (...)=> block is the lookahead of what follows the block. By definition, the syntactic predicate block defies static analysis (you want to try it out at run-time). The LOOK of (a)=>A B is A for LL(1) ### is this even called?
if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")"); return blk.next.look(k);
public persistence.antlr.Lookahead look(int k, persistence.antlr.TokenRangeElement r)
if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")"); // Skip until analysis hits k==1 if (k > 1) { return r.next.look(k - 1); } BitSet p = BitSet.of(r.begin); for (int i = r.begin + 1; i <= r.end; i++) { p.add(i); } return new Lookahead(p);
public persistence.antlr.Lookahead look(int k, persistence.antlr.TreeElement t)
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])"); if (k > 1) { return t.next.look(k - 1); } Lookahead l = null; if (t.root instanceof WildcardElement) { l = t.root.look(1); // compute FIRST set minus previous rows } else { l = Lookahead.of(t.root.getType()); if (t.root.not) { // Invert the lookahead set against the token vocabulary int maxToken = grammar.tokenManager.maxTokenType(); l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); } } return l;
public persistence.antlr.Lookahead look(int k, persistence.antlr.WildcardElement wc)
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")"); // Skip until analysis hits k==1 if (k > 1) { return wc.next.look(k - 1); } BitSet b; if (lexicalAnalysis) { // Copy the character vocabulary b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); } else { b = new BitSet(1); // Invert the lookahead set against the token vocabulary int maxToken = grammar.tokenManager.maxTokenType(); b.notInPlace(Token.MIN_USER_TYPE, maxToken); if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b); } // Remove prediction sets from competing alternatives // removeCompetingPredictionSets(b, wc); return new Lookahead(b);
public persistence.antlr.Lookahead look(int k, persistence.antlr.ZeroOrMoreBlock blk)
The (...)* element is the combined lookahead of the alternatives and what can follow the loop.
if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")"); Lookahead p = look(k, (AlternativeBlock)blk); Lookahead q = blk.next.look(k); p.combineWith(q); return p;
public persistence.antlr.Lookahead look(int k, java.lang.String rule)
Compute the combined lookahead for all productions of a rule. If the lookahead returns with epsilon, at least one epsilon path exists (one that consumes no tokens). The noFOLLOW flag being set for this endruleblk, indicates that the a rule ref invoked this rule. Currently only look(RuleRef) calls this. There is no need for the code generator to call this.
if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")"); RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule); RuleBlock rb = rs.getBlock(); if (rb.lock[k]) { if (DEBUG_ANALYZER) System.out.println("infinite recursion to rule " + rb.getRuleName()); return new Lookahead(rule); } // have we computed it before? if (rb.cache[k] != null) { if (DEBUG_ANALYZER) { System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " + rb.cache[k].toString(",", charFormatter, grammar)); } return (Lookahead)rb.cache[k].clone(); } rb.lock[k] = true; Lookahead p = look(k, (RuleBlock)rb); rb.lock[k] = false; // cache results rb.cache[k] = (Lookahead)p.clone(); if (DEBUG_ANALYZER) { System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " + rb.cache[k].toString(",", charFormatter, grammar)); } return p;
public persistence.antlr.Lookahead look(int k, persistence.antlr.ActionElement action)
Actions are ignored
if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")"); return action.next.look(k);
public static boolean lookaheadEquivForApproxAndFullAnalysis(persistence.antlr.Lookahead[] bset, int k)
If the first k-1 sets are singleton sets, the appoximate lookahead analysis is equivalent to full lookahead analysis.
// first k-1 sets degree 1? for (int i = 1; i <= k - 1; i++) { BitSet look = bset[i].fset; if (look.degree() > 1) { return false; } } return true;
private void removeCompetingPredictionSets(persistence.antlr.collections.impl.BitSet b, persistence.antlr.AlternativeElement el)
Remove the prediction sets from preceding alternatives and follow set, but *only* if this element is the first element of the alternative. The class members currenBlock and currentBlock.analysisAlt must be set correctly.
param
b The prediction bitset to be modified
el
The element of interest
// Only do this if the element is the first element of the alt, // because we are making an implicit assumption that k==1. GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head; // if element is #(. blah) then check to see if el is root if (head instanceof TreeElement) { if (((TreeElement)head).root != el) { return; } } else if (el != head) { return; } for (int i = 0; i < currentBlock.analysisAlt; i++) { AlternativeElement e = currentBlock.getAlternativeAt(i).head; b.subtractInPlace(e.look(1).fset); }
private void removeCompetingPredictionSetsFromWildcard(persistence.antlr.Lookahead[] look, persistence.antlr.AlternativeElement el, int k)
Remove the prediction sets from preceding alternatives The class members currenBlock must be set correctly. Remove prediction sets from 1..k.
param
look The prediction lookahead to be modified
el
The element of interest
k
How deep into lookahead to modify
for (int d = 1; d <= k; d++) { for (int i = 0; i < currentBlock.analysisAlt; i++) { AlternativeElement e = currentBlock.getAlternativeAt(i).head; look[d].fset.subtractInPlace(e.look(d).fset); } }
private void reset()
reset the analyzer so it looks like a new one
grammar = null; DEBUG_ANALYZER = false; currentBlock = null; lexicalAnalysis = false;
public void setGrammar(persistence.antlr.Grammar g)
Set the grammar for the analyzer
if (grammar != null) { reset(); } grammar = g; // Is this lexical? lexicalAnalysis = (grammar instanceof LexerGrammar); DEBUG_ANALYZER = grammar.analyzerDebug;
public boolean subruleCanBeInverted(persistence.antlr.AlternativeBlock blk, boolean forLexer)
if ( blk instanceof ZeroOrMoreBlock || blk instanceof OneOrMoreBlock || blk instanceof SynPredBlock ) { return false; } // Cannot invert an empty subrule if (blk.alternatives.size() == 0) { return false; } // The block must only contain alternatives with a single element, // where each element is a char, token, char range, or token range. for (int i = 0; i < blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); // Cannot have anything interesting in the alternative ... if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) { return false; } // ... and there must be one simple element AlternativeElement elt = alt.head; if ( !( elt instanceof CharLiteralElement || elt instanceof TokenRefElement || elt instanceof CharRangeElement || elt instanceof TokenRangeElement || (elt instanceof StringLiteralElement && !forLexer) ) || !(elt.next instanceof BlockEndElement) || elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE ) { return false; } } return true;