package a2; /** Instances of this class represent context-free grammars. Grammars are assumed not to contain useless symbols, optionality, epsilon rules, or multiple copies of the same rule. This class does not enforce the first of these constraints. */ // Grammar symbols are represented by strings. // Grammar rules are represented by instances of an // inner Rule class import java.util.HashMap; import java.util.TreeSet; import java.util.LinkedList; import java.util.Iterator; import java.util.Scanner; public class Grammar { // the start symbol of the grammar private String startSymbol; // the set of nonterminals of the grammar private TreeSet nonterminals; // the set of rules of the grammar, indexed by their left- // hand side. Each rule is represented by an instance // of the Rule class. private HashMap > rules; // the First function, as defined on page 108 of Louden, // 2nd edition. /** @param s the start symbol of the grammar, represented as a string. If the string contains white space (as defined for the Scanner class), then the substring before the first white space character is taken to be the start symbol. @throws IllegalArgumentException if the input string is empty. */ // Except for the start symbol, all instance variables // are initialized to empty collections. public Grammar(String s) { rules = new HashMap >(); Scanner sc = null; try { sc = new Scanner(s); if (sc.hasNext()) startSymbol = sc.next(); else throw new IllegalArgumentException( "no start symbol was specified"); } finally { if (sc != null) sc.close(); } nonterminals = new TreeSet(); } /** Adds a rule to the grammar, if it is not already present. @param ruleString a grammar rule represented as a string consisting of the left-hand side and the right-hand side's symbols in order, all separated by white space, e.g. "S NP VP". The string is assumed to contain no explicit separator of the left-hand side and the right-hand side of the rule. Any such separator will be treated as a grammar symbol. @return true if the rule was added to the grammar; false if it was rejected as a duplicate entry @throws IllegalArgumentException if the input string cannot be converted to a rule */ // Convert the input to an instance of the rule // class and get its left-hand side. Add this // symbol to the list of terminals, and use it // to index the rule in the list of grammar // rules. Note that adding the first rule with // a given left-hand side requires special // treatment in the HashMap representing the rules. // An exception may be thrown by the Rule constructor; // this method simply propagates it. public boolean addRule(String ruleString) { Rule r = new Rule(ruleString); String newLHS = r.getLHS(); nonterminals.add(newLHS); LinkedList otherRules = rules.get(newLHS); if (otherRules == null) otherRules = new LinkedList(); else if (otherRules.contains(r)) return false; otherRules.add(r); rules.put(newLHS, otherRules); return true; } /** Computes the First function (as defined on p. 108 of Louden) for a context-free grammar satisfying the constraints of this class. Note that for grammars so constrained, it is enough to define this function for nonterminal symbols, rather than for strings of terminals and nonterminals. Also, it follows from these assumptions that the value of this function on a nonterminal symbol will never be empty. This method throws an exception if recursive descent parsing is not possible, so it may be used to test for this possibility. @return a mapping representing the First function if recursive descent parsing is possible @throws an IllegalArgumentException if recursive descent parsing is not possible */ // The method simply traverses the set of nonterminals // and invokes a getFirst method to compute the value // for each nonterminal, and add the nonterminal and // its value to the output HashMap. This latter method // is memoized, and may invoke itself on other // nonterminals, thus leading to a chain of nonterminals // on which getFirst is called. It may also throw an // IllegalArgumentException. // The job of adding the function value to the map could // be performed by this function. The decision to do // that job in the helper function is essentially arbitrary. public HashMap > findFirst() { HashMap > firstMap = new HashMap >(); for (String nonterminal : nonterminals) { LinkedList path = new LinkedList(); path.add(nonterminal); TreeSet firstValue = getFirst(nonterminal, firstMap, path); } return firstMap; } /** Finds the value of the First function (as defined on p. 108 of Louden) of a given grammar symbol. @param nonterminal the symbol (the method will also work for terminals, and in fact must do so to properly handle recursive calls). @param firstMap represents the values of the First function computed so far @param path the list of nonterminals involved in the current chain of calls to First (for use in detecting indirect left recursion). @return the appropriate value of the First function for the given symbol @throws IllegalArgumentException if there is left recursion in the grammar or a conflict in the computation of the function -- that is, if there are two right-hand sides for the given nonterminal, and the First values of these two right-hand sides are not disjoint. */ // The algorithm is a straightforward recursive one, // except for the need to check for conflict. // For a nonterminal A, First(A) is the union of // First(X) over all X starting the RHS of a rule // whose LHS is A, unless two of these sets overlap. // We assume that the First value of a terminal symbol // is the set containing only the terminal. // Recursion may thus terminate when a terminal // argument is reached. // The implementation uses memoization -- that is, the // desired value is looked up (by checking the HashMap // firstMap) before an attempt to compute it is made. // Newly computed values are entered into this HashMap. // Recall that we are assuming that the grammar does not // contain 2 identical rules, that there is no optionality // or epsilon rule, and that the desired // value cannot be computed in the case of // direct or indirect recursion. private TreeSetgetFirst(String nonterminal, HashMap > firstMap, LinkedList path) { TreeSet output = new TreeSet(); // if the symbol is a terminal, or its FIRST value // has already been computed, return the proper value if (!nonterminals.contains(nonterminal)) { output.add(nonterminal); return output; } TreeSet mapValue = firstMap.get(nonterminal); if (mapValue != null) return mapValue; // otherwise get the rules for the nonterminal LinkedList nextRules = rules.get(nonterminal); if (nextRules == null) nextRules = new LinkedList(); // for each rule // get the 1st symbol on its RHS // check for left recursion // if there's none, find all of // the symbols in the first set // of this 1st symbol, for (Rule nextRule : nextRules) { LinkedList rhs = nextRule.getRHS(); String firstOfRHS = rhs.getFirst(); if (path.contains(firstOfRHS)) throw new IllegalArgumentException( " there is left recursion " + "involving the symbols " + path.toString()); LinkedList newPath = new LinkedList(path); if (nonterminals.contains(firstOfRHS)) newPath.add(firstOfRHS); TreeSet newSymbols = getFirst(firstOfRHS, firstMap, newPath); if (newSymbols == null) newSymbols = new TreeSet(); // for each such symbol, add it to the // output while checking for repetition // throw an exception if there's repetition // Note that addAll can't be used here, since // we need to check each symbol individually Iterator sit = newSymbols.iterator(); while (sit.hasNext()) { String symbol = sit.next(); boolean added = output.add(symbol); if (!added) throw new IllegalArgumentException( symbol + " is in the First set of the RHS of " + System.getProperty("line.separator") + nonterminal + " -> " + rhs + System.getProperty("line.separator") + " as well as of another rule" + " for that LHS"); }} // memoize the value computed for FIRST // and return it firstMap.put(nonterminal,output); return output; } /** An inner class whose instances represent rules in a context-free grammar */ // Symbols are represented as strings private class Rule { // the left-hand side of a rule private String lhs; // the right-hand side of a rule private LinkedList rhs; /** @param ruleString a string that represents the rule as a sequence of symbols. The symbols are represented by substrings and separated by whitespace (as determined by the Scanner class. @throws IllegalArgumentException if the input string consists of 0 or 1 symbols. */ public Rule(String ruleString) { Scanner sc = null; try { sc = new Scanner(ruleString); if (sc.hasNext()) lhs = sc.next(); else throw new IllegalArgumentException( "the proposed rule " + ruleString + "contains no symbols"); if (sc.hasNext()) { String firstRhs = sc.next(); rhs = new LinkedList(); rhs.add(firstRhs); while (sc.hasNext()) rhs.add(sc.next()); } else throw new IllegalArgumentException( "the proposed rule " + ruleString + " contains only 1 symbol"); } finally { if (sc != null) sc.close(); } } /** @return the symbol on the left-hand side of the rule, represented as a string. */ public String getLHS() { return lhs; } /** @return (a copy of) the list of symbols on the right-hand side of the rule, with each symbol represented as a string. */ public LinkedList getRHS() { return new LinkedList(rhs); } /** This method, as part of an inner class, should never receive an empty argument or a nonRule argument. */ public boolean equals(Object o) { Rule r = (Rule) o; return lhs.equals(r.lhs) && rhs.equals(r.rhs); } } // end of Rule class } // end of Grammar class