; This program defines a function "check-predictive" that ; determines whether a grammar can be parsed with predictive ; parsing or not. The grammar is assumed to be in BNF. Since ; there is no optionality on the RHS of any rule, the "Forward" ; criterion for predictive parsing does not apply. ; The term "immediate-successor" is used below to refer to ; a nonterminal that appears at the beginning of the RHS ; of a rule whose LHS is a given nonterminal. The "successor" ; relation is the reflexive transitive closure of the ; "immediate-successor" relation. Note that the "first" ; function described in class is also computed in terms of ; this transitive closure. ; The program uses property lists for nonterminal symbols ; to compute the transitive closures for the "successor" and ; "first" functions. (require-library "compat.ss") ; for property lists ;;;;;;;;;;;;;;;;;;; SELECTORS AND CONSTRUCTORS ;;;;;;;;;;;;;;;;; ; A grammar has 3 components -- ; a start symbol (that is a Lisp symbol) ; a list of nonterminals ; (that are Lisp symbols and that includes the start symbol) ; a list of rules (all of whose LHSs are symbols) ; A rule is represented as a list of symbols, the first of which ; is assumed to be the LHS. Note that in Scheme, two symbols ; cannot differ only in their case (define (make-grammar start nonterminals rules) (list start nonterminals rules)) (define start car) (define nonterminals cadr) (define rules caddr) (define (nonterminal? symbol grammar) (member symbol (nonterminals grammar))) ; Selectors for rules (define lhs car) (define rhs cdr) ;;;;;;;;;;;; THE TOP-LEVEL FUNCTION -- CHECK-PREDICTIVE ;;;;;;;;;;;; ; This is the top-level function. It takes a grammar and ; returns #t if the grammar can be parsed with predictive parsing, ; #f if it has left recursion, and the name of a symbol whose ; First values conflict (as described below) if there is such a ; symbol. In the latter two cases, it returns an error message. ; It works by calling a helper function that returns an error ; message in the form of a string if there is left recursion, ; returns a nonterminal whose First sets conflict (that is, a ; nonterminal A for which there are rules A ->B ... and A -> C ... ; where First(B) and First(C) intersect nontrivially) if there is one, ; and returns #f otherwise. The main function then simply decodes ; this return value and prints any additional error messages ; necessary. ; The helper first checks to see that the representation of the ; grammar is legal. Then it initializes the property lists for ; each nonterminal, and calls "find-immediate-successors" to ; record in these property lists the list of ; nonterminals and the list of terminals that appear at the ; beginning of the RHS of the rules for that nonterminal. ; Next it calls "propagate" to compute the "First" function, ; and to determine the set of nonterminals that can be reached ; from any nonterminal indirectly, without consuming a terminal ; symbol. That is, it computes the "successor" relation. ; If a nonterminal appears in its own set, then left recursion ; has been found, and an error message is returned. Otherwise ; (using "check-first") each nonterminal is checked to see whether ; its First sets conflict. If so, some such nonterminal is ; returned. Otherwise #t is returned. (define (check-predictive g) (define (helper g) (check-legality g) (init (nonterminals g)) (find-immediate-successors (nonterminals g) g) (begin0 (if (propagate (nonterminals g)) (check-first-sets (nonterminals g) g) "the grammar has left recursion") (clean-up g))) (let ((result (helper g))) (cond ((boolean? result) #t) ((string? result) (display result) (newline) #f) (else (display "predictive parsing cannot be used") (newline) (display "the symbol ") (display result) (display " has conflicting First values") result)))) ; This function initializes the property lists used in the ; top-level function. (define (init nonterms) (map (lambda (nonterm) (putprop nonterm 'immediate-successors '()) (putprop nonterm 'first '())) nonterms)) ; This function checks whether a grammar is legally specified. ; A grammar is legally specified iff its start symbol is a ; nonterminal, and the LHS of every rule is a nonterminal. ; Any symbol not specified as a nonterminal is assumed to be ; a terminal. (define (check-legality g) (let ((nonterminals (nonterminals g))) (and (member (start g) nonterminals) (not (member #f (map (lambda (rule) (member (lhs rule) nonterminals)) (rules g))))))) ; This function removes all property list values created ; directly or indirectly by the check-predictive function (define (clean-up g) (map (lambda (x) (putprop x 'successors #f) (putprop x 'immediate-successors #f) (putprop x 'first #f)) (nonterminals g))) ; This function finds the immediate nonterminal ; successors of a given nonterminal symbol, collects ; them in a list, and puts them on the property list ; of the symbol under both property names ; 'immediate-successors and 'successors. ; It also puts any terminals that begin a RHS of a rule ; whose LHS is the nonterminal, onto the property list ; of the LHS, under the property name 'first. ; The 'immediate-successors and 'first properties are ; dealt with by a helper function that traverses the ; set of rules. The outer function then copies the ; values of the 'immediate-successors properties to ; the 'successors property lists. ; The helper function uses an "add" function that adds an ; element to the front of a list if it is not already a ; member of the list. If it is passed a nonlist, it will ; return a list containing only the element. (define (find-immediate-successors nonterminals g) (define (helper nonterminal) (define (add x ls) (if (and ls (not (member x ls))) (cons x ls) (list x))) (for-each (lambda (rule) (if (eq? nonterminal (lhs rule)) (let ((symbol (car (rhs rule)))) (if (member symbol nonterminals) (putprop nonterminal 'immediate-successors (add symbol (getprop nonterminal 'immediate-successors))) (putprop nonterminal 'first (add symbol (getprop nonterminal 'first))))))) (rules g))) (for-each helper nonterminals) (for-each (lambda (nonterm) (putprop nonterm 'successors (getprop nonterm 'immediate-successors))) nonterminals)) ; This function finds, for each nonterminal in the given ; list of nonterminals of a given grammar g, ; 1) the value of its First function (that is, a list ; of all terminals that can begin its yield), and ; 2) the "successor" relation, which is just the ; transitive closure of the immediate successor ; relation. ; The nonterminals related to a given nonterminal by the ; "succesor" relation are stored (in a list) under the ; property name 'successors for the given nonterminal. ; The "propagate" function assumes that ; the initial values of the 'successors property are ; the same as those of the 'immediate-successors property, ; and that values of the "First" function are stored ; (as lists) under the property name 'first. ; The algorithm is straightforward implementation of the ; transitive closure algorithm. It repeatedly iterates ; (by calling the function "iterate") ; the operation of updating the "successors" property by adding ; the "successors" values symbol B to those of the symbol A ; whenever B is on the "immediate-successor" list of A. ; Similarly, it updates the value of the "first" property ; to include any values of First(B). ; The algorithm halts if an iteration has not modified any ; "successors" lists, or if an attempt is made to put a symbol ; on its own "successors" list. In latter case, the graph induced ; by the "successor" relation has a cycle, and recursive descent ; parsing cannot be used for the given grammar. An error ; message is printed in this case. Note that if ; there is a cycle, it will be found in a number of iterations ; no greater than the number of nonterminals in the grammar. ; The "iterate" function returns one of the symbols 'continue, ; 'halt, or 'fail. In the first case it needs to be called again, ; in the second case the outer function returns #t, and in the ; third case the outer function returns #f. The initial ; value for each iteration is 'halt, kept in a "status" ; variable. ; The "iterate" function uses a helper function "add-all" to update ; the status variable and the "successors" lists. It has a ; helper function that adds the newly found successors to ; the old. ; Note that failure is not returned until the end of an ; iteration, even though it may be detected earlier (define (propagate nonterminals) (define (add-all symbol property-name new status) (define (helper old new) ; just set union (cond ((null? new) old) ((member (car new) old) (helper old (cdr new))) (else (if (eq? status 'halt) (set! status 'continue)) (cons (car new) (helper old (cdr new)))))) (cond ((member symbol new) (display "the symbol ") (display symbol) (display " derives itself directly or indirectly") (newline) 'fail) (else (putprop symbol property-name (helper (getprop symbol property-name) new)) status))) ; Note that only the computation of the "successors" relation ; updates the "status" variable. (define (iterate nonterminals status) (for-each (lambda (nonterm) (for-each (lambda (succ) (let ((firstsuccs (getprop succ 'first))) (if firstsuccs (add-all nonterm 'first (getprop succ 'first) status))) (let ((isuccs (getprop succ 'immediate-successors))) (if isuccs (set! status (add-all nonterm 'successors isuccs status))))) (getprop nonterm 'successors))) nonterminals) status) ; begin outer function body (let ((result (iterate nonterminals 'halt))) (cond ((eq? result 'continue) (propagate nonterminals)) ((eq? result 'fail) #f) (else #t)))) ; The "check-first-sets" function simply steps through the list ; of nonterminals (its 1st argument) recursively, to see if ; the First values of any nonterminal conflict. This is checked ; (by the "first-ok?" predicate) by appending the First values ; of the symbols beginning the RHS of all the nonterminal's ; rules, and then checking to see if there is a duplicate entry ; in this list. Duplication is checked by the predicate ; "has-dupls?"). ; Note that the First value of a terminal is just a list containing ; the terminal. If a rule does not have the given nonterminal on ; LHS, the corresponding First value is represented by the empty ; list. ; If there is any repetition for any nonterminal, then the function ; will return such a nonterminal. Otherwise it returns #f. (define (check-first-sets nonterminals g) (define (first-ok? nonterm) (define (has-dupls? ls) (cond ((null? ls) #f) ((memq (car ls) (cdr ls)) #t) (else (has-dupls? (cdr ls))))) (not (has-dupls? (apply append (map (lambda (rule) (cond ((not (eq? (lhs rule) nonterm)) '()) ((member (car (rhs rule)) nonterminals) (getprop (car (rhs rule)) 'first)) (else (list (car (rhs rule)))))) (rules g)))))) (cond ((null? nonterminals) #t) ((not (first-ok? (car nonterminals))) (car nonterminals)) (else (check-first-sets (cdr nonterminals) g)))) ;;;;;;;;;;;;;;;;;;;;;;;;; TEST DATA ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This one can use predictive parsing (define g1 (make-grammar 'S '(S VP NP Det N V) '((S NP VP) (NP Det N) (VP V NP) (Det the) (V chased) (N dog) (N cat)))) ; This one has immediate left recursion (define g2 (make-grammar 'Expr '(Expr Term Factor Var Op) '((Expr Expr Op Expr) (Expr < Expr >) (Expr Var) (Var x) (Var y) (Op *) (Op +)))) ; This one has no left recursion, but can't use predictive parsing (define g3 (make-grammar 'Expr '(Expr Term Factor Var) '((Expr Term Op Expr) (Expr Term) (Term Factor Op Term) (Term Factor) (Factor Var) (Factor < Expr >) (Var x) (Var y)))) ; This one has indirect left recursion (define g4 (make-grammar 'A '(A B C D) '((A w) (B x) (C y) (D z) (A D w) (C B x) (D C y) (B A z)))) ; Here there is no recursion, but predictive parsing can't be used (define g5 (make-grammar 'Cond '(Cond Test Block Var Op Statements Statement) '((Cond if Test then Block else Block endif) (Cond if Test then Block endif) (Test < Var Op Var> ) (Var x) (Var y) (Op =) (Op /=) (Block begin Statements end) (Block begin end) (Statements Statement) (Statements Statement Statements) (Statement Var := Var)))) ; Here predictive parsing can be used (define g6 (make-grammar 'Cond '(Cond Test Block Var Op Statements Statement ShortIf RestIf RestBlock) '((Cond ShortIf RestIf) (ShortIf if Test then Block) (RestIf else Block endif) (RestIf endif) (Test < Var Op Var> ) (Var x) (Var y) (Op =) (Op /=) (Block begin RestBlock) (RestBlock end) (RestBlock Statement Statements) (Statements end) (Statements Statement Statements) (Statement Var := Var)))) ; Here there is no left recursion, but predictive parsing ; cannot be used (define g7 (make-grammar 'S '(S A B) '((S A d) (S B c) (A e) (B e)))) (define g8 (make-grammar 'E '(E F T Id) '((E T E) (E T) (T F T) (T F) (F Id) (F < E >) (Id x) (Id y))))