LLkAnalyzerpublic class LLkAnalyzer extends Object implements LLkGrammarAnalyzerA linear-approximate LL(k) grammar analzyer.
All lookahead elements are sets of token types. |
Fields Summary |
---|
public boolean | DEBUG_ANALYZER | private AlternativeBlock | currentBlock | protected Tool | tool | protected Grammar | grammar | protected boolean | lexicalAnalysis | CharFormatter | charFormatter |
Constructors Summary |
---|
public LLkAnalyzer(Tool tool_)Create an LLk analyzer
tool = tool_;
|
Methods Summary |
---|
public persistence.antlr.Lookahead | FOLLOW(int k, persistence.antlr.RuleEndElement end)Compute the lookahead set of whatever follows references to
the rule associated witht the FOLLOW block.
// what rule are we trying to compute FOLLOW of?
RuleBlock rb = (RuleBlock)end.block;
// rule name is different in lexer
String rule;
if (lexicalAnalysis) {
rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
}
else {
rule = rb.getRuleName();
}
if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")");
// are we in the midst of computing this FOLLOW already?
if (end.lock[k]) {
if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule);
return new Lookahead(rule);
}
// Check to see if there is cached value
if (end.cache[k] != null) {
if (DEBUG_ANALYZER) {
System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
}
// if the cache is a complete computation then simply return entry
if (end.cache[k].cycle == null) {
return (Lookahead)end.cache[k].clone();
}
// A cache entry exists, but it is a reference to a cyclic computation.
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
RuleEndElement re = rs.getBlock().endNode;
// The other entry may not exist because it is still being
// computed when this cycle cache entry was found here.
if (re.cache[k] == null) {
// return the cycle...that's all we can do at the moment.
return (Lookahead)end.cache[k].clone();
}
else {
if (DEBUG_ANALYZER) {
System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar));
}
// combine results from other rule's FOLLOW
if ( re.cache[k].cycle==null ) {
// current rule depends on another rule's FOLLOW and
// it is complete with no cycle; just kill our cycle and
// combine full result from other rule's FOLLOW
end.cache[k].combineWith(re.cache[k]);
end.cache[k].cycle = null; // kill cycle as we're complete
}
else {
// the FOLLOW cache for other rule has a cycle also.
// Here is where we bubble up a cycle. We better recursively
// wipe out cycles (partial computations). I'm a little nervous
// that we might leave a cycle here, however.
Lookahead refFOLLOW = FOLLOW(k, re);
end.cache[k].combineWith( refFOLLOW );
// all cycles should be gone, but if not, record ref to cycle
end.cache[k].cycle = refFOLLOW.cycle;
}
if (DEBUG_ANALYZER) {
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar));
}
// Return the updated cache entry associated
// with the cycle reference.
return (Lookahead)end.cache[k].clone();
}
}
end.lock[k] = true; // prevent FOLLOW computation cycles
Lookahead p = new Lookahead();
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
// Walk list of references to this rule to compute FOLLOW
for (int i = 0; i < rs.numReferences(); i++) {
RuleRefElement rr = rs.getReference(i);
if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString());
Lookahead q = rr.next.look(k);
if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
/* If there is a cycle then if the cycle is to the rule for
* this end block, you have a cycle to yourself. Remove the
* cycle indication--the lookahead is complete.
*/
if (q.cycle != null && q.cycle.equals(rule)) {
q.cycle = null; // don't want cycle to yourself!
}
// add the lookahead into the current FOLLOW computation set
p.combineWith(q);
if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
}
end.lock[k] = false; // we're not doing FOLLOW anymore
// if no rules follow this, it can be a start symbol or called by a start sym.
// set the follow to be end of file.
if (p.fset.nil() && p.cycle == null) {
if (grammar instanceof TreeWalkerGrammar) {
// Tree grammars don't see EOF, they see end of sibling list or
// "NULL TREE LOOKAHEAD".
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
}
else if (grammar instanceof LexerGrammar) {
// Lexical grammars use Epsilon to indicate that the end of rule has been hit
// EOF would be misleading; any character can follow a token rule not just EOF
// as in a grammar (where a start symbol is followed by EOF). There is no
// sequence info in a lexer between tokens to indicate what is the last token
// to be seen.
// p.fset.add(EPSILON_TYPE);
p.setEpsilon();
}
else {
p.fset.add(Token.EOF_TYPE);
}
}
// Cache the result of the FOLLOW computation
if (DEBUG_ANALYZER) {
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
}
end.cache[k] = (Lookahead)p.clone();
return p;
| protected boolean | altUsesWildcardDefault(persistence.antlr.Alternative alt)Return true if someone used the '.' wildcard default idiom.
Either #(. children) or '.' as an alt by itself.
AlternativeElement head = alt.head;
// if element is #(. blah) then check to see if el is root
if (head instanceof TreeElement &&
((TreeElement)head).root instanceof WildcardElement) {
return true;
}
if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
return true;
}
return false;
| public boolean | deterministic(persistence.antlr.AlternativeBlock blk)Is this block of alternatives LL(k)? Fill in alternative cache for this block.
/** The lookahead depth for this decision */
int k = 1; // start at k=1
if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")");
boolean det = true;
int nalts = blk.alternatives.size();
AlternativeBlock saveCurrentBlock = currentBlock;
Alternative wildcardAlt = null;
currentBlock = blk;
/* don't allow nongreedy (...) blocks */
if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
}
// SPECIAL CASE: only one alternative. We don't need to check the
// determinism, but other code expects the lookahead cache to be
// set for the single alt.
if (nalts == 1) {
AlternativeElement e = blk.getAlternativeAt(0).head;
currentBlock.alti = 0;
blk.getAlternativeAt(0).cache[1] = e.look(1);
blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
currentBlock = saveCurrentBlock;
return true; // always deterministic for one alt
}
outer:
for (int i = 0; i < nalts - 1; i++) {
currentBlock.alti = i;
currentBlock.analysisAlt = i; // which alt are we analyzing?
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
// but we need the alt number.
inner:
// compare against other alternatives with lookahead depth k
for (int j = i + 1; j < nalts; j++) {
currentBlock.altj = j;
if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j);
currentBlock.analysisAlt = j; // which alt are we analyzing?
k = 1; // always attempt minimum lookahead possible.
// check to see if there is a lookahead depth that distinguishes
// between alternatives i and j.
Lookahead[] r = new Lookahead[grammar.maxk + 1];
boolean haveAmbiguity;
do {
haveAmbiguity = false;
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
Lookahead p,q;
p = getAltLookahead(blk, i, k);
q = getAltLookahead(blk, j, k);
// compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
// Lookahead must be disjoint.
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar));
// r[i] = p.fset.and(q.fset);
r[k] = p.intersection(q);
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString());
if (!r[k].nil()) {
haveAmbiguity = true;
k++;
}
// go until no more lookahead to use or no intersection
} while (haveAmbiguity && k <= grammar.maxk);
Alternative ai = blk.getAlternativeAt(i);
Alternative aj = blk.getAlternativeAt(j);
if (haveAmbiguity) {
det = false;
ai.lookaheadDepth = NONDETERMINISTIC;
aj.lookaheadDepth = NONDETERMINISTIC;
/* if ith alt starts with a syntactic predicate, computing the
* lookahead is still done for code generation, but messages
* should not be generated when comparing against alt j.
* Alternatives with syn preds that are unnecessary do
* not result in syn pred try-blocks.
*/
if (ai.synPred != null) {
if (DEBUG_ANALYZER) {
System.out.println("alt " + i + " has a syn pred");
}
// The alt with the (...)=> block is nondeterministic for sure.
// If the (...)=> conflicts with alt j, j is nondeterministic.
// This prevents alt j from being in any switch statements.
// move on to next alternative=>no possible ambiguity!
// continue inner;
}
/* if ith alt starts with a semantic predicate, computing the
* lookahead is still done for code generation, but messages
* should not be generated when comparing against alt j.
*/
else if (ai.semPred != null) {
if (DEBUG_ANALYZER) {
System.out.println("alt " + i + " has a sem pred");
}
}
/* if jth alt is exactly the wildcard or wildcard root of tree,
* then remove elements from alt i lookahead from alt j's lookahead.
* Don't do an ambiguity warning.
*/
else if (altUsesWildcardDefault(aj)) {
// System.out.println("removing pred sets");
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
wildcardAlt = aj;
}
/* If the user specified warnWhenFollowAmbig=false, then we
* can turn off this warning IFF one of the alts is empty;
* that is, it points immediately at the end block.
*/
else if (!blk.warnWhenFollowAmbig &&
(ai.head instanceof BlockEndElement ||
aj.head instanceof BlockEndElement)) {
// System.out.println("ai.head pts to "+ai.head.getClass());
// System.out.println("aj.head pts to "+aj.head.getClass());
}
/* If they have the generateAmbigWarnings option off for the block
* then don't generate a warning.
*/
else if (!blk.generateAmbigWarnings) {
}
/* If greedy=true and *one* empty alt shut off warning. */
else if (blk.greedySet && blk.greedy &&
((ai.head instanceof BlockEndElement &&
!(aj.head instanceof BlockEndElement)) ||
(aj.head instanceof BlockEndElement &&
!(ai.head instanceof BlockEndElement)))) {
// System.out.println("greedy set to true; one alt empty");
}
/* We have no choice, but to report a nondetermism */
else {
tool.errorHandler.warnAltAmbiguity(
grammar,
blk, // the block
lexicalAnalysis, // true if lexical
grammar.maxk, // depth of ambiguity
r, // set of linear ambiguities
i, // first ambiguous alternative
j // second ambiguous alternative
);
}
}
else {
// a lookahead depth, k, was found where i and j do not conflict
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
}
}
}
// finished with block.
// If had wildcard default clause idiom, remove competing lookahead
/*
if ( wildcardAlt!=null ) {
removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
}
*/
currentBlock = saveCurrentBlock;
return det;
| public boolean | deterministic(persistence.antlr.OneOrMoreBlock blk)Is (...)+ block LL(1)? Fill in alternative cache for this block.
if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")");
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
boolean blkOk = deterministic((AlternativeBlock)blk);
// block has been checked, now check that what follows does not conflict
// with the lookahead of the (...)+ block.
boolean det = deterministicImpliedPath(blk);
currentBlock = saveCurrentBlock;
return det && blkOk;
| public boolean | deterministic(persistence.antlr.ZeroOrMoreBlock blk)Is (...)* block LL(1)? Fill in alternative cache for this block.
if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")");
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
boolean blkOk = deterministic((AlternativeBlock)blk);
// block has been checked, now check that what follows does not conflict
// with the lookahead of the (...)* block.
boolean det = deterministicImpliedPath(blk);
currentBlock = saveCurrentBlock;
return det && blkOk;
| public boolean | deterministicImpliedPath(persistence.antlr.BlockWithImpliedExitPath blk)Is this (...)* or (...)+ block LL(k)?
/** The lookahead depth for this decision considering implied exit path */
int k;
boolean det = true;
Vector alts = blk.getAlternatives();
int nalts = alts.size();
currentBlock.altj = -1; // comparing against implicit optional/exit alt
if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath");
for (int i = 0; i < nalts; i++) { // check follow against all alts
Alternative alt = blk.getAlternativeAt(i);
if (alt.head instanceof BlockEndElement) {
tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
}
k = 1; // assume eac alt is LL(1) with exit branch
// check to see if there is a lookahead depth that distinguishes
// between alternative i and the exit branch.
Lookahead[] r = new Lookahead[grammar.maxk + 1];
boolean haveAmbiguity;
do {
haveAmbiguity = false;
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
Lookahead p;
Lookahead follow = blk.next.look(k);
blk.exitCache[k] = follow;
currentBlock.alti = i;
p = getAltLookahead(blk, i, k);
if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
//r[k] = follow.fset.and(p.fset);
r[k] = follow.intersection(p);
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]);
if (!r[k].nil()) {
haveAmbiguity = true;
k++;
}
// go until no more lookahead to use or no intersection
} while (haveAmbiguity && k <= grammar.maxk);
if (haveAmbiguity) {
det = false;
alt.lookaheadDepth = NONDETERMINISTIC;
blk.exitLookaheadDepth = NONDETERMINISTIC;
Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
/* If the user specified warnWhenFollowAmbig=false, then we
* can turn off this warning.
*/
if (!blk.warnWhenFollowAmbig) {
}
/* If they have the generateAmbigWarnings option off for the block
* then don't generate a warning.
*/
else if (!blk.generateAmbigWarnings) {
}
/* If greedy=true and alt not empty, shut off warning */
else if (blk.greedy == true && blk.greedySet &&
!(ambigAlt.head instanceof BlockEndElement)) {
if (DEBUG_ANALYZER) System.out.println("greedy loop");
}
/* If greedy=false then shut off warning...will have
* to add "if FOLLOW break"
* block during code gen to compensate for removal of warning.
*/
else if (blk.greedy == false &&
!(ambigAlt.head instanceof BlockEndElement)) {
if (DEBUG_ANALYZER) System.out.println("nongreedy loop");
// if FOLLOW not single k-string (|set[k]| can
// be > 1 actually) then must warn them that
// loop may terminate incorrectly.
// For example, ('a'..'d')+ ("ad"|"cb")
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
tool.warning(new String[]{
"nongreedy block may exit incorrectly due",
"\tto limitations of linear approximate lookahead (first k-1 sets",
"\tin lookahead not singleton)."},
grammar.getFilename(), blk.getLine(), blk.getColumn());
}
}
// no choice but to generate a warning
else {
tool.errorHandler.warnAltExitAmbiguity(
grammar,
blk, // the block
lexicalAnalysis, // true if lexical
grammar.maxk, // depth of ambiguity
r, // set of linear ambiguities
i // ambiguous alternative
);
}
}
else {
alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
}
}
return det;
| private persistence.antlr.Lookahead | getAltLookahead(persistence.antlr.AlternativeBlock blk, int alt, int k)
Lookahead p;
Alternative a = blk.getAlternativeAt(alt);
AlternativeElement e = a.head;
//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
if (a.cache[k] == null) {
p = e.look(k);
a.cache[k] = p;
}
else {
p = a.cache[k];
}
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.AlternativeBlock blk)Combine the lookahead computed for each alternative
if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")");
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
Lookahead p = new Lookahead();
for (int i = 0; i < blk.alternatives.size(); i++) {
if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk);
// must set analysis alt
currentBlock.analysisAlt = i;
Alternative alt = blk.getAlternativeAt(i);
AlternativeElement elem = alt.head;
if (DEBUG_ANALYZER) {
if (alt.head == alt.tail) {
System.out.println("alt " + i + " is empty");
}
}
Lookahead q = elem.look(k);
p.combineWith(q);
}
if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
// Invert the lookahead set
if (lexicalAnalysis) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
int[] elems = p.fset.toArray();
for (int j = 0; j < elems.length; j++) {
b.remove(elems[j]);
}
p.fset = b;
}
else {
p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
}
}
currentBlock = saveCurrentBlock;
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.BlockEndElement end)Compute what follows this place-holder node and possibly
what begins the associated loop unless the
node is locked.
if we hit the end of a loop, we have to include
what tokens can begin the loop as well. If the start
node is locked, then we simply found an empty path
through this subrule while analyzing it. If the
start node is not locked, then this node was hit
during a FOLLOW operation and the FIRST of this
block must be included in that lookahead computation.
if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
if (end.lock[k]) {
// computation in progress => the tokens we would have
// computed (had we not been locked) will be included
// in the set by that computation with the lock on this
// node.
return new Lookahead();
}
Lookahead p;
/* Hitting the end of a loop means you can see what begins the loop */
if (end.block instanceof ZeroOrMoreBlock ||
end.block instanceof OneOrMoreBlock) {
// compute what can start the block,
// but lock end node so we don't do it twice in same
// computation.
end.lock[k] = true;
p = look(k, end.block);
end.lock[k] = false;
}
else {
p = new Lookahead();
}
/* Tree blocks do not have any follow because they are children
* of what surrounds them. For example, A #(B C) D results in
* a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
* indicates that nothing can follow the last node of tree #(B C)
*/
if (end.block instanceof TreeElement) {
p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
}
/* Syntactic predicates such as ( (A)? )=> have no follow per se.
* We cannot accurately say what would be matched following a
* syntactic predicate (you MIGHT be ok if you said it was whatever
* followed the alternative predicted by the predicate). Hence,
* (like end-of-token) we return Epsilon to indicate "unknown
* lookahead."
*/
else if (end.block instanceof SynPredBlock) {
p.setEpsilon();
}
// compute what can follow the block
else {
Lookahead q = end.block.next.look(k);
p.combineWith(q);
}
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.CharLiteralElement atom)Return this char as the lookahead if k=1.
### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
If the atom has the not flag on, then
create the set complement of the tokenType
which is the set of all characters referenced
in the grammar with this char turned off.
Also remove characters from the set that
are currently allocated for predicting
previous alternatives. This avoids ambiguity
messages and is more properly what is meant.
( 'a' | ~'a' ) implies that the ~'a' is the
"else" clause.
NOTE: we do NOT include exit path in
the exclusion set. E.g.,
( 'a' | ~'a' )* 'b'
should exit upon seeing a 'b' during the loop.
if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")");
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
if (lexicalAnalysis) {
if (atom.not) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString());
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(b, atom);
if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
// now remove element that is stated not to be in the set
b.clear(atom.getType());
return new Lookahead(b);
}
else {
return Lookahead.of(atom.getType());
}
}
else {
// Should have been avoided by MakeGrammar
tool.panic("Character literal reference found in parser");
// ... so we make the compiler happy
return Lookahead.of(atom.getType());
}
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.CharRangeElement r)
if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")");
// Skip until analysis hits k==1
if (k > 1) {
return r.next.look(k - 1);
}
BitSet p = BitSet.of(r.begin);
for (int i = r.begin + 1; i <= r.end; i++) {
p.add(i);
}
return new Lookahead(p);
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.GrammarAtom atom)
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");
if (lexicalAnalysis) {
// MakeGrammar should have created a rule reference instead
tool.panic("token reference found in lexer");
}
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
Lookahead l = Lookahead.of(atom.getType());
if (atom.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(l.fset, atom);
}
return l;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.OneOrMoreBlock blk)The lookahead of a (...)+ block is the combined lookahead of
all alternatives and, if an empty path is found, the lookahead
of what follows the block.
if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")");
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.RuleBlock blk)Combine the lookahead computed for each alternative.
Lock the node so that no other computation may come back
on itself--infinite loop. This also implies infinite left-recursion
in the grammar (or an error in this algorithm ;)).
if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")");
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.RuleEndElement end)If not locked or noFOLLOW set, compute FOLLOW of a rule.
TJP says 8/12/99: not true anymore:
Lexical rules never compute follow. They set epsilon and
the code generator gens code to check for any character.
The code generator must remove the tokens used to predict
any previous alts in the same block.
When the last node of a rule is reached and noFOLLOW,
it implies that a "local" FOLLOW will be computed
after this call. I.e.,
a : b A;
b : B | ;
c : b C;
Here, when computing the look of rule b from rule a,
we want only {B,EPSILON_TYPE} so that look(b A) will
be {B,A} not {B,A,C}.
if the end block is not locked and the FOLLOW is
wanted, the algorithm must compute the lookahead
of what follows references to this rule. If
end block is locked, FOLLOW will return an empty set
with a cycle to the rule associated with this end block.
if (DEBUG_ANALYZER)
System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
end.noFOLLOW + "; lock is " + end.lock[k]);
if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
Lookahead p = new Lookahead();
p.setEpsilon();
p.epsilonDepth = BitSet.of(k);
return p;
}
Lookahead p = FOLLOW(k, end);
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.RuleRefElement rr)Compute the lookahead contributed by a rule reference.
When computing ruleref lookahead, we don't want the FOLLOW
computation done if an empty path exists for the rule.
The FOLLOW is too loose of a set...we want only to
include the "local" FOLLOW or what can follow this
particular ref to the node. In other words, we use
context information to reduce the complexity of the
analysis and strengthen the parser.
The noFOLLOW flag is used as a means of restricting
the FOLLOW to a "local" FOLLOW. This variable is
orthogonal to the lock variable that prevents
infinite recursion. noFOLLOW does not care about what k is.
if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")");
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
if (rs == null || !rs.defined) {
tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
return new Lookahead();
}
RuleBlock rb = rs.getBlock();
RuleEndElement end = rb.endNode;
boolean saveEnd = end.noFOLLOW;
end.noFOLLOW = true;
// go off to the rule and get the lookahead (w/o FOLLOW)
Lookahead p = look(k, rr.targetRule);
if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule);
// restore state of end block
end.noFOLLOW = saveEnd;
// check for infinite recursion. If a cycle is returned: trouble!
if (p.cycle != null) {
tool.error("infinite recursion to rule " + p.cycle + " from rule " +
rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
}
// is the local FOLLOW required?
if (p.containsEpsilon()) {
if (DEBUG_ANALYZER)
System.out.println("rule ref to " +
rr.targetRule + " has eps, depth: " + p.epsilonDepth);
// remove epsilon
p.resetEpsilon();
// fset.clear(EPSILON_TYPE);
// for each lookahead depth that saw epsilon
int[] depths = p.epsilonDepth.toArray();
p.epsilonDepth = null; // clear all epsilon stuff
for (int i = 0; i < depths.length; i++) {
int rk = k - (k - depths[i]);
Lookahead q = rr.next.look(rk); // see comments in Lookahead
p.combineWith(q);
}
// note: any of these look() computations for local follow can
// set EPSILON in the set again if the end of this rule is found.
}
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.StringLiteralElement atom)
if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")");
if (lexicalAnalysis) {
// need more lookahead than string can provide?
if (k > atom.processedAtomText.length()) {
return atom.next.look(k - atom.processedAtomText.length());
}
else {
// get char at lookahead depth k, from the processed literal text
return Lookahead.of(atom.processedAtomText.charAt(k - 1));
}
}
else {
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
Lookahead l = Lookahead.of(atom.getType());
if (atom.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
}
return l;
}
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.SynPredBlock blk)The lookahead of a (...)=> block is the lookahead of
what follows the block. By definition, the syntactic
predicate block defies static analysis (you want to try it
out at run-time). The LOOK of (a)=>A B is A for LL(1)
### is this even called?
if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")");
return blk.next.look(k);
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.TokenRangeElement r)
if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")");
// Skip until analysis hits k==1
if (k > 1) {
return r.next.look(k - 1);
}
BitSet p = BitSet.of(r.begin);
for (int i = r.begin + 1; i <= r.end; i++) {
p.add(i);
}
return new Lookahead(p);
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.TreeElement t)
if (DEBUG_ANALYZER)
System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
if (k > 1) {
return t.next.look(k - 1);
}
Lookahead l = null;
if (t.root instanceof WildcardElement) {
l = t.root.look(1); // compute FIRST set minus previous rows
}
else {
l = Lookahead.of(t.root.getType());
if (t.root.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
}
}
return l;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.WildcardElement wc)
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")");
// Skip until analysis hits k==1
if (k > 1) {
return wc.next.look(k - 1);
}
BitSet b;
if (lexicalAnalysis) {
// Copy the character vocabulary
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
}
else {
b = new BitSet(1);
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
b.notInPlace(Token.MIN_USER_TYPE, maxToken);
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b);
}
// Remove prediction sets from competing alternatives
// removeCompetingPredictionSets(b, wc);
return new Lookahead(b);
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.ZeroOrMoreBlock blk)The (...)* element is the combined lookahead of the alternatives and what can
follow the loop.
if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")");
Lookahead p = look(k, (AlternativeBlock)blk);
Lookahead q = blk.next.look(k);
p.combineWith(q);
return p;
| public persistence.antlr.Lookahead | look(int k, java.lang.String rule)Compute the combined lookahead for all productions of a rule.
If the lookahead returns with epsilon, at least one epsilon
path exists (one that consumes no tokens). The noFOLLOW
flag being set for this endruleblk, indicates that the
a rule ref invoked this rule.
Currently only look(RuleRef) calls this. There is no need
for the code generator to call this.
if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")");
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
RuleBlock rb = rs.getBlock();
if (rb.lock[k]) {
if (DEBUG_ANALYZER)
System.out.println("infinite recursion to rule " + rb.getRuleName());
return new Lookahead(rule);
}
// have we computed it before?
if (rb.cache[k] != null) {
if (DEBUG_ANALYZER) {
System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
rb.cache[k].toString(",", charFormatter, grammar));
}
return (Lookahead)rb.cache[k].clone();
}
rb.lock[k] = true;
Lookahead p = look(k, (RuleBlock)rb);
rb.lock[k] = false;
// cache results
rb.cache[k] = (Lookahead)p.clone();
if (DEBUG_ANALYZER) {
System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
rb.cache[k].toString(",", charFormatter, grammar));
}
return p;
| public persistence.antlr.Lookahead | look(int k, persistence.antlr.ActionElement action)Actions are ignored
if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")");
return action.next.look(k);
| public static boolean | lookaheadEquivForApproxAndFullAnalysis(persistence.antlr.Lookahead[] bset, int k)If the first k-1 sets are singleton sets, the appoximate
lookahead analysis is equivalent to full lookahead analysis.
// first k-1 sets degree 1?
for (int i = 1; i <= k - 1; i++) {
BitSet look = bset[i].fset;
if (look.degree() > 1) {
return false;
}
}
return true;
| private void | removeCompetingPredictionSets(persistence.antlr.collections.impl.BitSet b, persistence.antlr.AlternativeElement el)Remove the prediction sets from preceding alternatives
and follow set, but *only* if this element is the first element
of the alternative. The class members currenBlock and
currentBlock.analysisAlt must be set correctly.
// Only do this if the element is the first element of the alt,
// because we are making an implicit assumption that k==1.
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
// if element is #(. blah) then check to see if el is root
if (head instanceof TreeElement) {
if (((TreeElement)head).root != el) {
return;
}
}
else if (el != head) {
return;
}
for (int i = 0; i < currentBlock.analysisAlt; i++) {
AlternativeElement e = currentBlock.getAlternativeAt(i).head;
b.subtractInPlace(e.look(1).fset);
}
| private void | removeCompetingPredictionSetsFromWildcard(persistence.antlr.Lookahead[] look, persistence.antlr.AlternativeElement el, int k)Remove the prediction sets from preceding alternatives
The class members currenBlock must be set correctly.
Remove prediction sets from 1..k.
for (int d = 1; d <= k; d++) {
for (int i = 0; i < currentBlock.analysisAlt; i++) {
AlternativeElement e = currentBlock.getAlternativeAt(i).head;
look[d].fset.subtractInPlace(e.look(d).fset);
}
}
| private void | reset()reset the analyzer so it looks like a new one
grammar = null;
DEBUG_ANALYZER = false;
currentBlock = null;
lexicalAnalysis = false;
| public void | setGrammar(persistence.antlr.Grammar g)Set the grammar for the analyzer
if (grammar != null) {
reset();
}
grammar = g;
// Is this lexical?
lexicalAnalysis = (grammar instanceof LexerGrammar);
DEBUG_ANALYZER = grammar.analyzerDebug;
| public boolean | subruleCanBeInverted(persistence.antlr.AlternativeBlock blk, boolean forLexer)
if (
blk instanceof ZeroOrMoreBlock ||
blk instanceof OneOrMoreBlock ||
blk instanceof SynPredBlock
) {
return false;
}
// Cannot invert an empty subrule
if (blk.alternatives.size() == 0) {
return false;
}
// The block must only contain alternatives with a single element,
// where each element is a char, token, char range, or token range.
for (int i = 0; i < blk.alternatives.size(); i++) {
Alternative alt = blk.getAlternativeAt(i);
// Cannot have anything interesting in the alternative ...
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
return false;
}
// ... and there must be one simple element
AlternativeElement elt = alt.head;
if (
!(
elt instanceof CharLiteralElement ||
elt instanceof TokenRefElement ||
elt instanceof CharRangeElement ||
elt instanceof TokenRangeElement ||
(elt instanceof StringLiteralElement && !forLexer)
) ||
!(elt.next instanceof BlockEndElement) ||
elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
) {
return false;
}
}
return true;
|
|