/* * RecursiveDescentParser.cs * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */ using System; using System.Collections; namespace PerCederberg.Grammatica.Parser { /** * A recursive descent parser. This parser handles LL(n) grammars, * selecting the appropriate pattern to parse based on the next few * tokens. The parser is more efficient the fewer look-ahead tokens * that is has to consider. * * @author Per Cederberg, <per at percederberg dot net> * @version 1.0 */ public class RecursiveDescentParser : Parser { /** * The map of pattern look-ahead sets. The map is indexed by * the production pattern object. */ private Hashtable lookAheads = new Hashtable(); /** * Creates a new parser. * * @param tokenizer the tokenizer to use */ public RecursiveDescentParser(Tokenizer tokenizer) : base(tokenizer) { } /** * Creates a new parser. * * @param tokenizer the tokenizer to use * @param analyzer the analyzer callback to use */ public RecursiveDescentParser(Tokenizer tokenizer, Analyzer analyzer) : base(tokenizer, analyzer) { } /** * Adds a new production pattern to the parser. The pattern * will be added last in the list. The first pattern added is * assumed to be the starting point in the grammar. The * pattern will be validated against the grammar type to some * extent. * * @param pattern the pattern to add * * @throws ParserCreationException if the pattern couldn't be * added correctly to the parser */ public override void AddPattern(ProductionPattern pattern) { // Check for empty matches if (pattern.IsMatchingEmpty()) { throw new ParserCreationException( ParserCreationException.ErrorType.INVALID_PRODUCTION, pattern.GetName(), "zero elements can be matched (minimum is one)"); } // Check for left-recusive patterns if (pattern.IsLeftRecursive()) { throw new ParserCreationException( ParserCreationException.ErrorType.INVALID_PRODUCTION, pattern.GetName(), "left recursive patterns are not allowed"); } // Add pattern base.AddPattern(pattern); } /** * Initializes the parser. All the added production patterns * will be analyzed for ambiguities and errors. This method * also initializes the internal data structures used during * the parsing. * * @throws ParserCreationException if the parser couldn't be * initialized correctly */ public override void Prepare() { IEnumerator e; // Performs production pattern checks base.Prepare(); SetInitialized(false); // Calculate production look-ahead sets e = GetPatterns().GetEnumerator(); while (e.MoveNext()) { CalculateLookAhead((ProductionPattern) e.Current); } // Set initialized flag SetInitialized(true); } /** * Parses the input stream and creates a parse tree. * * @return the parse tree * * @throws ParseException if the input couldn't be parsed * correctly */ protected override Node ParseStart() { Token token; Node node; ArrayList list; node = ParsePattern(GetStartPattern()); token = PeekToken(0); if (token != null) { list = new ArrayList(1); list.Add("<EOF>"); throw new ParseException( ParseException.ErrorType.UNEXPECTED_TOKEN, token.ToShortString(), list, token.GetStartLine(), token.GetStartColumn()); } return node; } /** * Parses a production pattern. A parse tree node may or may * not be created depending on the analyzer callbacks. * * @param pattern the production pattern to parse * * @return the parse tree node created, or null * * @throws ParseException if the input couldn't be parsed * correctly */ private Node ParsePattern(ProductionPattern pattern) { ProductionPatternAlternative alt; ProductionPatternAlternative defaultAlt; defaultAlt = pattern.GetDefaultAlternative(); for (int i = 0; i < pattern.GetAlternativeCount(); i++) { alt = pattern.GetAlternative(i); if (defaultAlt != alt && IsNext(alt)) { return ParseAlternative(alt); } } if (defaultAlt == null || !IsNext(defaultAlt)) { ThrowParseException(FindUnion(pattern)); } return ParseAlternative(defaultAlt); } /** * Parses a production pattern alternative. A parse tree node * may or may not be created depending on the analyzer * callbacks. * * @param alt the production pattern alternative * * @return the parse tree node created, or null * * @throws ParseException if the input couldn't be parsed * correctly */ private Node ParseAlternative(ProductionPatternAlternative alt) { Production node; node = new Production(alt.GetPattern()); EnterNode(node); for (int i = 0; i < alt.GetElementCount(); i++) { try { ParseElement(node, alt.GetElement(i)); } catch (ParseException e) { AddError(e, true); NextToken(); i--; } } return ExitNode(node); } /** * Parses a production pattern element. All nodes parsed may * or may not be added to the parse tree node specified, * depending on the analyzer callbacks. * * @param node the production parse tree node * @param elem the production pattern element to parse * * @throws ParseException if the input couldn't be parsed * correctly */ private void ParseElement(Production node, ProductionPatternElement elem) { Node child; for (int i = 0; i < elem.GetMaxCount(); i++) { if (i < elem.GetMinCount() || IsNext(elem)) { if (elem.IsToken()) { child = NextToken(elem.GetId()); EnterNode(child); AddNode(node, ExitNode(child)); } else { child = ParsePattern(GetPattern(elem.GetId())); AddNode(node, child); } } else { break; } } } /** * Checks if the next tokens match a production pattern. The * pattern look-ahead set will be used if existing, otherwise * this method returns false. * * @param pattern the pattern to check * * @return true if the next tokens match, or * false otherwise */ private bool IsNext(ProductionPattern pattern) { LookAheadSet set = pattern.GetLookAhead(); if (set == null) { return false; } else { return set.IsNext(this); } } /** * Checks if the next tokens match a production pattern * alternative. The pattern alternative look-ahead set will be * used if existing, otherwise this method returns false. * * @param alt the pattern alternative to check * * @return true if the next tokens match, or * false otherwise */ private bool IsNext(ProductionPatternAlternative alt) { LookAheadSet set = alt.GetLookAhead(); if (set == null) { return false; } else { return set.IsNext(this); } } /** * Checks if the next tokens match a production pattern * element. If the element has a look-ahead set it will be * used, otherwise the look-ahead set of the referenced * production or token will be used. * * @param elem the pattern element to check * * @return true if the next tokens match, or * false otherwise */ private bool IsNext(ProductionPatternElement elem) { LookAheadSet set = elem.GetLookAhead(); if (set != null) { return set.IsNext(this); } else if (elem.IsToken()) { return elem.IsMatch(PeekToken(0)); } else { return IsNext(GetPattern(elem.GetId())); } } /** * Calculates the look-ahead needed for the specified production * pattern. This method attempts to resolve any conflicts and * stores the results in the pattern look-ahead object. * * @param pattern the production pattern * * @throws ParserCreationException if the look-ahead set couldn't * be determined due to inherent ambiguities */ private void CalculateLookAhead(ProductionPattern pattern) { ProductionPatternAlternative alt; LookAheadSet result; LookAheadSet[] alternatives; LookAheadSet conflicts; LookAheadSet previous = new LookAheadSet(0); int length = 1; int i; CallStack stack = new CallStack(); // Calculate simple look-ahead stack.Push(pattern.GetName(), 1); result = new LookAheadSet(1); alternatives = new LookAheadSet[pattern.GetAlternativeCount()]; for (i = 0; i < pattern.GetAlternativeCount(); i++) { alt = pattern.GetAlternative(i); alternatives[i] = FindLookAhead(alt, 1, 0, stack, null); alt.SetLookAhead(alternatives[i]); result.AddAll(alternatives[i]); } if (pattern.GetLookAhead() == null) { pattern.SetLookAhead(result); } conflicts = FindConflicts(pattern, 1); // Resolve conflicts while (conflicts.Size() > 0) { length++; stack.Clear(); stack.Push(pattern.GetName(), length); conflicts.AddAll(previous); for (i = 0; i < pattern.GetAlternativeCount(); i++) { alt = pattern.GetAlternative(i); if (alternatives[i].Intersects(conflicts)) { alternatives[i] = FindLookAhead(alt, length, 0, stack, conflicts); alt.SetLookAhead(alternatives[i]); } if (alternatives[i].Intersects(conflicts)) { if (pattern.GetDefaultAlternative() == null) { pattern.SetDefaultAlternative(i); } else if (pattern.GetDefaultAlternative() != alt) { result = alternatives[i].CreateIntersection(conflicts); ThrowAmbiguityException(pattern.GetName(), null, result); } } } previous = conflicts; conflicts = FindConflicts(pattern, length); } // Resolve conflicts inside rules for (i = 0; i < pattern.GetAlternativeCount(); i++) { CalculateLookAhead(pattern.GetAlternative(i), 0); } } /** * Calculates the look-aheads needed for the specified pattern * alternative. This method attempts to resolve any conflicts in * optional elements by recalculating look-aheads for referenced * productions. * * @param alt the production pattern alternative * @param pos the pattern element position * * @throws ParserCreationException if the look-ahead set couldn't * be determined due to inherent ambiguities */ private void CalculateLookAhead(ProductionPatternAlternative alt, int pos) { ProductionPattern pattern; ProductionPatternElement elem; LookAheadSet first; LookAheadSet follow; LookAheadSet conflicts; LookAheadSet previous = new LookAheadSet(0); String location; int length = 1; // Check trivial cases if (pos >= alt.GetElementCount()) { return; } // Check for non-optional element pattern = alt.GetPattern(); elem = alt.GetElement(pos); if (elem.GetMinCount() == elem.GetMaxCount()) { CalculateLookAhead(alt, pos + 1); return; } // Calculate simple look-aheads first = FindLookAhead(elem, 1, new CallStack(), null); follow = FindLookAhead(alt, 1, pos + 1, new CallStack(), null); // Resolve conflicts location = "at position " + (pos + 1); conflicts = FindConflicts(pattern.GetName(), location, first, follow); while (conflicts.Size() > 0) { length++; conflicts.AddAll(previous); first = FindLookAhead(elem, length, new CallStack(), conflicts); follow = FindLookAhead(alt, length, pos + 1, new CallStack(), conflicts); first = first.CreateCombination(follow); elem.SetLookAhead(first); if (first.Intersects(conflicts)) { first = first.CreateIntersection(conflicts); ThrowAmbiguityException(pattern.GetName(), location, first); } previous = conflicts; conflicts = FindConflicts(pattern.GetName(), location, first, follow); } // Check remaining elements CalculateLookAhead(alt, pos + 1); } /** * Finds the look-ahead set for a production pattern. The maximum * look-ahead length must be specified. It is also possible to * specify a look-ahead set filter, which will make sure that * unnecessary token sequences will be avoided. * * @param pattern the production pattern * @param length the maximum look-ahead length * @param stack the call stack used for loop detection * @param filter the look-ahead set filter * * @return the look-ahead set for the production pattern * * @throws ParserCreationException if an infinite loop was found * in the grammar */ private LookAheadSet FindLookAhead(ProductionPattern pattern, int length, CallStack stack, LookAheadSet filter) { LookAheadSet result; LookAheadSet temp; // Check for infinite loop if (stack.Contains(pattern.GetName(), length)) { throw new ParserCreationException( ParserCreationException.ErrorType.INFINITE_LOOP, pattern.GetName(), (String) null); } // Find pattern look-ahead stack.Push(pattern.GetName(), length); result = new LookAheadSet(length); for (int i = 0; i < pattern.GetAlternativeCount(); i++) { temp = FindLookAhead(pattern.GetAlternative(i), length, 0, stack, filter); result.AddAll(temp); } stack.Pop(); return result; } /** * Finds the look-ahead set for a production pattern alternative. * The pattern position and maximum look-ahead length must be * specified. It is also possible to specify a look-ahead set * filter, which will make sure that unnecessary token sequences * will be avoided. * * @param alt the production pattern alternative * @param length the maximum look-ahead length * @param pos the pattern element position * @param stack the call stack used for loop detection * @param filter the look-ahead set filter * * @return the look-ahead set for the pattern alternative * * @throws ParserCreationException if an infinite loop was found * in the grammar */ private LookAheadSet FindLookAhead(ProductionPatternAlternative alt, int length, int pos, CallStack stack, LookAheadSet filter) { LookAheadSet first; LookAheadSet follow; LookAheadSet overlaps; // Check trivial cases if (length <= 0 || pos >= alt.GetElementCount()) { return new LookAheadSet(0); } // Find look-ahead for this element first = FindLookAhead(alt.GetElement(pos), length, stack, filter); if (alt.GetElement(pos).GetMinCount() == 0) { first.AddEmpty(); } // Find remaining look-ahead if (filter == null) { length -= first.GetMinLength(); if (length > 0) { follow = FindLookAhead(alt, length, pos + 1, stack, null); first = first.CreateCombination(follow); } } else if (filter.IsOverlap(first)) { overlaps = first.CreateOverlaps(filter); length -= overlaps.GetMinLength(); filter = filter.CreateFilter(overlaps); follow = FindLookAhead(alt, length, pos + 1, stack, filter); first.RemoveAll(overlaps); first.AddAll(overlaps.CreateCombination(follow)); } return first; } /** * Finds the look-ahead set for a production pattern element. The * maximum look-ahead length must be specified. This method takes * the element repeats into consideration when creating the * look-ahead set, but does NOT include an empty sequence even if * the minimum count is zero (0). It is also possible to specify a * look-ahead set filter, which will make sure that unnecessary * token sequences will be avoided. * * @param elem the production pattern element * @param length the maximum look-ahead length * @param stack the call stack used for loop detection * @param filter the look-ahead set filter * * @return the look-ahead set for the pattern element * * @throws ParserCreationException if an infinite loop was found * in the grammar */ private LookAheadSet FindLookAhead(ProductionPatternElement elem, int length, CallStack stack, LookAheadSet filter) { LookAheadSet result; LookAheadSet first; LookAheadSet follow; int max; // Find initial element look-ahead first = FindLookAhead(elem, length, 0, stack, filter); result = new LookAheadSet(length); result.AddAll(first); if (filter == null || !filter.IsOverlap(result)) { return result; } // Handle element repetitions if (elem.GetMaxCount() == Int32.MaxValue) { first = first.CreateRepetitive(); } max = elem.GetMaxCount(); if (length < max) { max = length; } for (int i = 1; i < max; i++) { first = first.CreateOverlaps(filter); if (first.Size() <= 0 || first.GetMinLength() >= length) { break; } follow = FindLookAhead(elem, length, 0, stack, filter.CreateFilter(first)); first = first.CreateCombination(follow); result.AddAll(first); } return result; } /** * Finds the look-ahead set for a production pattern element. The * maximum look-ahead length must be specified. This method does * NOT take the element repeat into consideration when creating * the look-ahead set. It is also possible to specify a look-ahead * set filter, which will make sure that unnecessary token * sequences will be avoided. * * @param elem the production pattern element * @param length the maximum look-ahead length * @param dummy a parameter to distinguish the method * @param stack the call stack used for loop detection * @param filter the look-ahead set filter * * @return the look-ahead set for the pattern element * * @throws ParserCreationException if an infinite loop was found * in the grammar */ private LookAheadSet FindLookAhead(ProductionPatternElement elem, int length, int dummy, CallStack stack, LookAheadSet filter) { LookAheadSet result; ProductionPattern pattern; if (elem.IsToken()) { result = new LookAheadSet(length); result.Add(elem.GetId()); } else { pattern = GetPattern(elem.GetId()); result = FindLookAhead(pattern, length, stack, filter); if (stack.Contains(pattern.GetName())) { result = result.CreateRepetitive(); } } return result; } /** * Returns a look-ahead set with all conflics between * alternatives in a production pattern. * * @param pattern the production pattern * @param maxLength the maximum token sequence length * * @return a look-ahead set with the conflicts found * * @throws ParserCreationException if an inherent ambiguity was * found among the look-ahead sets */ private LookAheadSet FindConflicts(ProductionPattern pattern, int maxLength) { LookAheadSet result = new LookAheadSet(maxLength); LookAheadSet set1; LookAheadSet set2; for (int i = 0; i < pattern.GetAlternativeCount(); i++) { set1 = pattern.GetAlternative(i).GetLookAhead(); for (int j = 0; j < i; j++) { set2 = pattern.GetAlternative(j).GetLookAhead(); result.AddAll(set1.CreateIntersection(set2)); } } if (result.IsRepetitive()) { ThrowAmbiguityException(pattern.GetName(), null, result); } return result; } /** * Returns a look-ahead set with all conflicts between two * look-ahead sets. * * @param pattern the pattern name being analyzed * @param location the pattern location * @param set1 the first look-ahead set * @param set2 the second look-ahead set * * @return a look-ahead set with the conflicts found * * @throws ParserCreationException if an inherent ambiguity was * found among the look-ahead sets */ private LookAheadSet FindConflicts(string pattern, string location, LookAheadSet set1, LookAheadSet set2) { LookAheadSet result; result = set1.CreateIntersection(set2); if (result.IsRepetitive()) { ThrowAmbiguityException(pattern, location, result); } return result; } /** * Returns the union of all alternative look-ahead sets in a * production pattern. * * @param pattern the production pattern * * @return a unified look-ahead set */ private LookAheadSet FindUnion(ProductionPattern pattern) { LookAheadSet result; int length = 0; int i; for (i = 0; i < pattern.GetAlternativeCount(); i++) { result = pattern.GetAlternative(i).GetLookAhead(); if (result.GetMaxLength() > length) { length = result.GetMaxLength(); } } result = new LookAheadSet(length); for (i = 0; i < pattern.GetAlternativeCount(); i++) { result.AddAll(pattern.GetAlternative(i).GetLookAhead()); } return result; } /** * Throws a parse exception that matches the specified look-ahead * set. This method will take into account any initial matching * tokens in the look-ahead set. * * @param set the look-ahead set to match * * @throws ParseException always thrown by this method */ private void ThrowParseException(LookAheadSet set) { Token token; ArrayList list = new ArrayList(); int[] initials; // Read tokens until mismatch while (set.IsNext(this, 1)) { set = set.CreateNextSet(NextToken().GetId()); } // Find next token descriptions initials = set.GetInitialTokens(); for (int i = 0; i < initials.Length; i++) { list.Add(GetTokenDescription(initials[i])); } // Create exception token = NextToken(); throw new ParseException(ParseException.ErrorType.UNEXPECTED_TOKEN, token.ToShortString(), list, token.GetStartLine(), token.GetStartColumn()); } /** * Throws a parser creation exception for an ambiguity. The * specified look-ahead set contains the token conflicts to be * reported. * * @param pattern the production pattern name * @param location the production pattern location, or null * @param set the look-ahead set with conflicts * * @throws ParserCreationException always thrown by this method */ private void ThrowAmbiguityException(string pattern, string location, LookAheadSet set) { ArrayList list = new ArrayList(); int[] initials; // Find next token descriptions initials = set.GetInitialTokens(); for (int i = 0; i < initials.Length; i++) { list.Add(GetTokenDescription(initials[i])); } // Create exception throw new ParserCreationException( ParserCreationException.ErrorType.INHERENT_AMBIGUITY, pattern, location, list); } /** * A name value stack. This stack is used to detect loops and * repetitions of the same production during look-ahead analysis. */ private class CallStack { /** * A stack with names. */ private ArrayList nameStack = new ArrayList(); /** * A stack with values. */ private ArrayList valueStack = new ArrayList(); /** * Checks if the specified name is on the stack. * * @param name the name to search for * * @return true if the name is on the stack, or * false otherwise */ public bool Contains(string name) { return nameStack.Contains(name); } /** * Checks if the specified name and value combination is on * the stack. * * @param name the name to search for * @param value the value to search for * * @return true if the combination is on the stack, or * false otherwise */ public bool Contains(string name, int value) { for (int i = 0; i < nameStack.Count; i++) { if (nameStack[i].Equals(name) && valueStack[i].Equals(value)) { return true; } } return false; } /** * Clears the stack. This method removes all elements on * the stack. */ public void Clear() { nameStack.Clear(); valueStack.Clear(); } /** * Adds a new element to the top of the stack. * * @param name the stack name * @param value the stack value */ public void Push(string name, int value) { nameStack.Add(name); valueStack.Add(value); } /** * Removes the top element of the stack. */ public void Pop() { if (nameStack.Count > 0) { nameStack.RemoveAt(nameStack.Count -1); valueStack.RemoveAt(valueStack.Count -1); } } } } }