/* * LookAheadSet.cs * * This work is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This work is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * As a special exception, the copyright holders of this library give * you permission to link this library with independent modules to * produce an executable, regardless of the license terms of these * independent modules, and to copy and distribute the resulting * executable under terms of your choice, provided that you also meet, * for each linked independent module, the terms and conditions of the * license of that module. An independent module is a module which is * not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the * library, but you are not obligated to do so. If you do not wish to * do so, delete this exception statement from your version. * * Copyright (c) 2003 Per Cederberg. All rights reserved. */ using System.Collections; using System.Text; namespace PerCederberg.Grammatica.Parser { /** * A token look-ahead set. This class contains a set of token id * sequences. All sequences in the set are limited in length, so * that no single sequence is longer than a maximum value. This * class also filters out duplicates. Each token sequence also * contains a repeat flag, allowing the look-ahead set to contain * information about possible infinite repetitions of certain * sequences. That information is important when conflicts arise * between two look-ahead sets, as such a conflict cannot be * resolved if the conflicting sequences can be repeated (would * cause infinite loop). * * @author Per Cederberg, <per at percederberg dot net> * @version 1.1 */ internal class LookAheadSet { /** * The set of token look-ahead sequences. Each sequence in * turn is represented by an ArrayList with Integers for the * token id:s. */ private ArrayList elements = new ArrayList(); /** * The maximum length of any look-ahead sequence. */ private int maxLength; /** * Creates a new look-ahead set with the specified maximum * length. * * @param maxLength the maximum token sequence length */ public LookAheadSet(int maxLength) { this.maxLength = maxLength; } /** * Creates a duplicate look-ahead set, possibly with a * different maximum length. * * @param maxLength the maximum token sequence length * @param set the look-ahead set to copy */ public LookAheadSet(int maxLength, LookAheadSet set) : this(maxLength) { AddAll(set); } /** * Returns the size of this look-ahead set. * * @return the number of token sequences in the set */ public int Size() { return elements.Count; } /** * Returns the length of the shortest token sequence in this * set. This method will return zero (0) if the set is empty. * * @return the length of the shortest token sequence */ public int GetMinLength() { Sequence seq; int min = -1; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (min < 0 || seq.Length() < min) { min = seq.Length(); } } return (min < 0) ? 0 : min; } /** * Returns the length of the longest token sequence in this * set. This method will return zero (0) if the set is empty. * * @return the length of the longest token sequence */ public int GetMaxLength() { Sequence seq; int max = 0; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (seq.Length() > max) { max = seq.Length(); } } return max; } /** * Returns a list of the initial token id:s in this look-ahead * set. The list returned will not contain any duplicates. * * @return a list of the inital token id:s in this look-ahead set */ public int[] GetInitialTokens() { ArrayList list = new ArrayList(); int[] result; object token; int i; for (i = 0; i < elements.Count; i++) { token = ((Sequence) elements[i]).GetToken(0); if (token != null && !list.Contains(token)) { list.Add(token); } } result = new int[list.Count]; for (i = 0; i < list.Count; i++) { result[i] = (int) list[i]; } return result; } /** * Checks if this look-ahead set contains a repetitive token * sequence. * * @return true if at least one token sequence is repetitive, or * false otherwise */ public bool IsRepetitive() { Sequence seq; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (seq.IsRepetitive()) { return true; } } return false; } /** * Checks if the next token(s) in the parser match any token * sequence in this set. * * @param parser the parser to check * * @return true if the next tokens are in the set, or * false otherwise */ public bool IsNext(Parser parser) { Sequence seq; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (seq.IsNext(parser)) { return true; } } return false; } /** * Checks if the next token(s) in the parser match any token * sequence in this set. * * @param parser the parser to check * @param length the maximum number of tokens to check * * @return true if the next tokens are in the set, or * false otherwise */ public bool IsNext(Parser parser, int length) { Sequence seq; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (seq.IsNext(parser, length)) { return true; } } return false; } /** * Checks if another look-ahead set has an overlapping token * sequence. An overlapping token sequence is a token sequence * that is identical to another sequence, but for the length. * I.e. one of the two sequences may be longer than the other. * * @param set the look-ahead set to check * * @return true if there is some token sequence that overlaps, or * false otherwise */ public bool IsOverlap(LookAheadSet set) { for (int i = 0; i < elements.Count; i++) { if (set.IsOverlap((Sequence) elements[i])) { return true; } } return false; } /** * Checks if a token sequence is overlapping. An overlapping token * sequence is a token sequence that is identical to another * sequence, but for the length. I.e. one of the two sequences may * be longer than the other. * * @param seq the token sequence to check * * @return true if there is some token sequence that overlaps, or * false otherwise */ private bool IsOverlap(Sequence seq) { Sequence elem; for (int i = 0; i < elements.Count; i++) { elem = (Sequence) elements[i]; if (seq.StartsWith(elem) || elem.StartsWith(seq)) { return true; } } return false; } /** * Checks if the specified token sequence is present in the * set. * * @param elem the token sequence to check * * @return true if the sequence is present in this set, or * false otherwise */ private bool Contains(Sequence elem) { return FindSequence(elem) != null; } /** * Checks if some token sequence is present in both this set * and a specified one. * * @param set the look-ahead set to compare with * * @return true if the look-ahead sets intersect, or * false otherwise */ public bool Intersects(LookAheadSet set) { for (int i = 0; i < elements.Count; i++) { if (set.Contains((Sequence) elements[i])) { return true; } } return false; } /** * Finds an identical token sequence if present in the set. * * @param elem the token sequence to search for * * @return an identical the token sequence if found, or * null if not found */ private Sequence FindSequence(Sequence elem) { for (int i = 0; i < elements.Count; i++) { if (elements[i].Equals(elem)) { return (Sequence) elements[i]; } } return null; } /** * Adds a token sequence to this set. The sequence will only * be added if it is not already in the set. Also, if the * sequence is longer than the allowed maximum, a truncated * sequence will be added instead. * * @param seq the token sequence to add */ private void Add(Sequence seq) { if (seq.Length() > maxLength) { seq = new Sequence(maxLength, seq); } if (!Contains(seq)) { elements.Add(seq); } } /** * Adds a new token sequence with a single token to this set. * The sequence will only be added if it is not already in the * set. * * @param token the token to add */ public void Add(int token) { Add(new Sequence(false, token)); } /** * Adds all the token sequences from a specified set. Only * sequences not already in this set will be added. * * @param set the set to add from */ public void AddAll(LookAheadSet set) { for (int i = 0; i < set.elements.Count; i++) { Add((Sequence) set.elements[i]); } } /** * Adds an empty token sequence to this set. The sequence will * only be added if it is not already in the set. */ public void AddEmpty() { Add(new Sequence()); } /** * Removes a token sequence from this set. * * @param seq the token sequence to remove */ private void Remove(Sequence seq) { elements.Remove(seq); } /** * Removes all the token sequences from a specified set. Only * sequences already in this set will be removed. * * @param set the set to remove from */ public void RemoveAll(LookAheadSet set) { for (int i = 0; i < set.elements.Count; i++) { Remove((Sequence) set.elements[i]); } } /** * Creates a new look-ahead set that is the result of reading * the specified token. The new look-ahead set will contain * the rest of all the token sequences that started with the * specified token. * * @param token the token to read * * @return a new look-ahead set containing the remaining tokens */ public LookAheadSet CreateNextSet(int token) { LookAheadSet result = new LookAheadSet(maxLength -1); Sequence seq; object value; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; value = seq.GetToken(0); if (value != null && token == (int) value) { result.Add(seq.Subsequence(1)); } } return result; } /** * Creates a new look-ahead set that is the intersection of * this set with another set. The token sequences in the net * set will only have the repeat flag set if it was set in * both the identical token sequences. * * @param set the set to intersect with * * @return a new look-ahead set containing the intersection */ public LookAheadSet CreateIntersection(LookAheadSet set) { LookAheadSet result = new LookAheadSet(maxLength); Sequence seq1; Sequence seq2; for (int i = 0; i < elements.Count; i++) { seq1 = (Sequence) elements[i]; seq2 = set.FindSequence(seq1); if (seq2 != null && seq1.IsRepetitive()) { result.Add(seq2); } else if (seq2 != null) { result.Add(seq1); } } return result; } /** * Creates a new look-ahead set that is the combination of * this set with another set. The combination is created by * creating new token sequences that consist of appending all * elements from the specified set onto all elements in this * set. This is sometimes referred to as the cartesian * product. * * @param set the set to combine with * * @return a new look-ahead set containing the combination */ public LookAheadSet CreateCombination(LookAheadSet set) { LookAheadSet result = new LookAheadSet(maxLength); Sequence first; Sequence second; // Handle special cases if (this.Size() <= 0) { return set; } else if (set.Size() <= 0) { return this; } // Create combinations for (int i = 0; i < elements.Count; i++) { first = (Sequence) elements[i]; if (first.Length() >= maxLength) { result.Add(first); } else if (first.Length() <= 0) { result.AddAll(set); } else { for (int j = 0; j < set.elements.Count; j++) { second = (Sequence) set.elements[j]; result.Add(first.Concat(maxLength, second)); } } } return result; } /** * Creates a new look-ahead set with overlaps from another. All * token sequences in this set that overlaps with the other set * will be added to the new look-ahead set. * * @param set the look-ahead set to check with * * @return a new look-ahead set containing the overlaps */ public LookAheadSet CreateOverlaps(LookAheadSet set) { LookAheadSet result = new LookAheadSet(maxLength); Sequence seq; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (set.IsOverlap(seq)) { result.Add(seq); } } return result; } /** * Creates a new look-ahead set filter. The filter will contain * all sequences from this set, possibly left trimmed by each one * of the sequences in the specified set. * * @param set the look-ahead set to trim with * * @return a new look-ahead set filter */ public LookAheadSet CreateFilter(LookAheadSet set) { LookAheadSet result = new LookAheadSet(maxLength); Sequence first; Sequence second; // Handle special cases if (this.Size() <= 0 || set.Size() <= 0) { return this; } // Create combinations for (int i = 0; i < elements.Count; i++) { first = (Sequence) elements[i]; for (int j = 0; j < set.elements.Count; j++) { second = (Sequence) set.elements[j]; if (first.StartsWith(second)) { result.Add(first.Subsequence(second.Length())); } } } return result; } /** * Creates a new identical look-ahead set, except for the * repeat flag being set in each token sequence. * * @return a new repetitive look-ahead set */ public LookAheadSet CreateRepetitive() { LookAheadSet result = new LookAheadSet(maxLength); Sequence seq; for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; if (seq.IsRepetitive()) { result.Add(seq); } else { result.Add(new Sequence(true, seq)); } } return result; } /** * Returns a string representation of this object. * * @return a string representation of this object */ public override string ToString() { return ToString(null); } /** * Returns a string representation of this object. * * @param tokenizer the tokenizer containing the tokens * * @return a string representation of this object */ public string ToString(Tokenizer tokenizer) { StringBuilder buffer = new StringBuilder(); Sequence seq; buffer.Append("{"); for (int i = 0; i < elements.Count; i++) { seq = (Sequence) elements[i]; buffer.Append("\n "); buffer.Append(seq.ToString(tokenizer)); } buffer.Append("\n}"); return buffer.ToString(); } /** * A token sequence. This class contains a list of token ids. * It is immutable after creation, meaning that no changes * will be made to an instance after creation. * * @author Per Cederberg, <per at percederberg dot net> * @version 1.0 */ private class Sequence { /** * The repeat flag. If this flag is set, the token * sequence or some part of it may be repeated infinitely. */ private bool repeat = false; /** * The list of token ids in this sequence. */ private ArrayList tokens = null; /** * Creates a new empty token sequence. The repeat flag * will be set to false. */ public Sequence() { this.repeat = false; this.tokens = new ArrayList(0); } /** * Creates a new token sequence with a single token. * * @param repeat the repeat flag value * @param token the token to add */ public Sequence(bool repeat, int token) { this.repeat = false; this.tokens = new ArrayList(1); this.tokens.Add(token); } /** * Creates a new token sequence that is a duplicate of * another sequence. Only a limited number of tokens will * be copied however. The repeat flag from the original * will be kept intact. * * @param length the maximum number of tokens to copy * @param seq the sequence to copy */ public Sequence(int length, Sequence seq) { this.repeat = seq.repeat; this.tokens = new ArrayList(length); if (seq.Length() < length) { length = seq.Length(); } for (int i = 0; i < length; i++) { tokens.Add(seq.tokens[i]); } } /** * Creates a new token sequence that is a duplicate of * another sequence. The new value of the repeat flag will * be used however. * * @param repeat the new repeat flag value * @param seq the sequence to copy */ public Sequence(bool repeat, Sequence seq) { this.repeat = repeat; this.tokens = seq.tokens; } /** * Returns the length of the token sequence. * * @return the number of tokens in the sequence */ public int Length() { return tokens.Count; } /** * Returns a token at a specified position in the sequence. * * @param pos the sequence position * * @return the token id found, or null */ public object GetToken(int pos) { if (pos >= 0 && pos < tokens.Count) { return tokens[pos]; } else { return null; } } /** * Checks if this sequence is equal to another object. * Only token sequences with the same tokens in the same * order will be considered equal. The repeat flag will be * disregarded. * * @param obj the object to compare with * * @return true if the objects are equal, or * false otherwise */ public override bool Equals(object obj) { if (obj is Sequence) { return Equals((Sequence) obj); } else { return false; } } /** * Checks if this sequence is equal to another sequence. * Only sequences with the same tokens in the same order * will be considered equal. The repeat flag will be * disregarded. * * @param seq the sequence to compare with * * @return true if the sequences are equal, or * false otherwise */ public bool Equals(Sequence seq) { if (tokens.Count != seq.tokens.Count) { return false; } for (int i = 0; i < tokens.Count; i++) { if (!tokens[i].Equals(seq.tokens[i])) { return false; } } return true; } /** * Checks if this token sequence starts with the tokens from * another sequence. If the other sequence is longer than this * sequence, this method will always return false. * * @param seq the token sequence to check * * @return true if this sequence starts with the other, or * false otherwise */ public bool StartsWith(Sequence seq) { if (Length() < seq.Length()) { return false; } for (int i = 0; i < seq.tokens.Count; i++) { if (!tokens[i].Equals(seq.tokens[i])) { return false; } } return true; } /** * Checks if this token sequence is repetitive. A repetitive * token sequence is one with the repeat flag set. * * @return true if this token sequence is repetitive, or * false otherwise */ public bool IsRepetitive() { return repeat; } /** * Checks if the next token(s) in the parser matches this * token sequence. * * @param parser the parser to check * * @return true if the next tokens are in the sequence, or * false otherwise */ public bool IsNext(Parser parser) { Token token; int id; for (int i = 0; i < tokens.Count; i++) { id = (int) tokens[i]; token = parser.PeekToken(i); if (token == null || token.GetId() != id) { return false; } } return true; } /** * Checks if the next token(s) in the parser matches this * token sequence. * * @param parser the parser to check * @param length the maximum number of tokens to check * * @return true if the next tokens are in the sequence, or * false otherwise */ public bool IsNext(Parser parser, int length) { Token token; int id; if (length > tokens.Count) { length = tokens.Count; } for (int i = 0; i < length; i++) { id = (int) tokens[i]; token = parser.PeekToken(i); if (token == null || token.GetId() != id) { return false; } } return true; } /** * Returns a string representation of this object. * * @return a string representation of this object */ public override string ToString() { return ToString(null); } /** * Returns a string representation of this object. * * @param tokenizer the tokenizer containing the tokens * * @return a string representation of this object */ public string ToString(Tokenizer tokenizer) { StringBuilder buffer = new StringBuilder(); string str; int id; if (tokenizer == null) { buffer.Append(tokens.ToString()); } else { buffer.Append("["); for (int i = 0; i < tokens.Count; i++) { id = (int) tokens[i]; str = tokenizer.GetPatternDescription(id); if (i > 0) { buffer.Append(" "); } buffer.Append(str); } buffer.Append("]"); } if (repeat) { buffer.Append(" *"); } return buffer.ToString(); } /** * Creates a new token sequence that is the concatenation * of this sequence and another. A maximum length for the * new sequence is also specified. * * @param length the maximum length of the result * @param seq the other sequence * * @return the concatenated token sequence */ public Sequence Concat(int length, Sequence seq) { Sequence res = new Sequence(length, this); if (seq.repeat) { res.repeat = true; } length -= this.Length(); if (length > seq.Length()) { res.tokens.AddRange(seq.tokens); } else { for (int i = 0; i < length; i++) { res.tokens.Add(seq.tokens[i]); } } return res; } /** * Creates a new token sequence that is a subsequence of * this one. * * @param start the subsequence start position * * @return the new token subsequence */ public Sequence Subsequence(int start) { Sequence res = new Sequence(Length(), this); while (start > 0 && res.tokens.Count > 0) { res.tokens.RemoveAt(0); start--; } return res; } } } }