; This file contains function definitions for CS 152, ; Fall 2006, Assignment 4. ;;;;;;;;;;;;;;;;;;;;;;;; RULES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; A structure to represent rules in a context-free grammar ; the left-hand side of a rule should be a symbol ; the right-hand side of a rule should be a list of symbols (define-struct rule (lhs rhs)) ; An error-checking constructor for rules. Normally it returns ; a rule. ; It checks ; whether the lhs is a symbol ; whether the rhs is a nonempty list that contains only symbols. ; If it finds an error of one of these sorts, it returns a ; string representing an error message. ; Note that the requirement of a nonempty right-hand side rules ; out epsilon-rules. Also, no extensions to BNF are permitted -- ; not even the use of the vertical bar. (define (build-rule lhs rhs) ; A predicate to determine whether a given list contains ; a nonsymbol. The argument is assumed to be a list. (define (nonsymbol-present? ls) (member #f (map symbol? ls))) (cond ((not (symbol? lhs)) "the rule's left-hand side is not a symbol") ((not (list? rhs)) "the rule's right-hand side must be given in list form") ((null? rhs) "empty right-hand sides are not permitted in rules") ((nonsymbol-present? rhs) "a nonsymbol is present in the proposed right-hand side") (else (make-rule lhs rhs)))) ; This function converts a rule to a string, with the ; "->" symbol separating the left and right hand sides ; A recursive helper function handles the right-hand ; side. ; Arguments that are not rule structures will be left ; unchanged. Note that a rule structure constructed ; with "make-rule" with improper arguments won't be ; handled correctly, although a rule constructed with ; "build-rule" will be. (define (rule->string rule) (define (rhs->string rhs) (cond ((null? rhs) "") (else (string-append " " (symbol->string (car rhs)) (rhs->string (cdr rhs)))))) (if (rule? rule) (string-append (symbol->string (rule-lhs rule)) " ->" (rhs->string (rule-rhs rule))) rule)) ; This predicate determines whether a given rule is a member of a ; given list. No error checking is performed on the input, ; except that the members of the rule-list are checked ; to see whether they are rule structures (as opposed to, ; say, error messages for ill-formed rules). (define (rule-member? rule rule-list) (cond ((null? rule-list) #f) ((not (rule? (car rule-list))) (rule-member? rule (cdr rule-list))) ((and (equal? (rule-lhs rule) (rule-lhs (car rule-list))) (equal? (rule-rhs rule) (rule-rhs (car rule-list)))) #t) (else (rule-member? rule (cdr rule-list))))) ;;;;;;;;;;;;;;;;;;;;;; GRAMMARS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; A structure to represent context-free grammars, with components: ; the start symbol of the grammar ; it should be a symbol ; an unordered list of the nonterminals of the grammar ; each nonterminal should be a symbol ; an unordered list of rules of the grammar ; each list member should be of structured type "rule" ; Grammars are assumed not to contain useless symbols. ; However this constraint is nowhere enforced. (define-struct grammar (start-symbol nonterminals rules)) ; An error-checking constructor for grammars. Normally it returns ; a grammar. Its arguments correspond to the first and third ; arguments of the grammar structure type. ; It checks: ; whether the start-symbol is a symbol ; whether the list of rules is a list ; whether the list of rules contains only rules ; whether the list of rules is free of duplicate rules ; In the first two cases, this function will return a string ; representing an error message. ; The latter two cases are treated as errors which this function ; will fix, without notice, by deleting nonrules and second ; and subsequent occurrences of the same rule ; contains only rules. ; The function does not check that there are any rules in ; the rule list. ; The function computes the set of nonterminals of the grammar, ; under the assumption that a symbol is a nonterminal iff it ; appears on the left-hand side of some rule. (define (build-grammar start-symbol rules) ; This function takes a list of rules and returns a list, ; with no duplicates, of all symbols appearing on the ; left-hand side of a rule. By our assumptions, the ; result is just the set of nonterminal symbols of the ; grammar. (define (collect-nonterminals rulelist) (remove-duplicates (map rule-lhs rulelist))) ; This function takes a list and removes the list obtained by ; removing all duplicate members ; The argument may be assumed to be a list. (define (remove-duplicates ls) (cond ((null? ls) ls) ((member (car ls) (cdr ls)) (remove-duplicates (cdr ls))) (else (cons (car ls) (remove-duplicates (cdr ls)))))) ; This function takes a list and returns the list obrained ; by removing all nonrules, and all duplicate rules. ; The argument may be assumed to be a list. (define (remove-nonrules-and-duplicate-rules ls) (cond ((null? ls) ls) ((not (rule? (car ls))) (remove-nonrules-and-duplicate-rules (cdr ls))) ((rule-member? (car ls) (cdr ls)) (remove-nonrules-and-duplicate-rules (cdr ls))) (else (cons (car ls) (remove-nonrules-and-duplicate-rules (cdr ls)))))) ; the body of BUILD-GRAMMAR (cond ((not (symbol? start-symbol)) "the start symbol must be a symbol") ((not (list? rules)) "the rules must be given in list form") (else (let ((good-rules (remove-nonrules-and-duplicate-rules rules))) (make-grammar start-symbol (collect-nonterminals good-rules) good-rules))))) ; A predicate to determine whether a given symbol is a nonterminal ; symbol of a given grammar. No error checking is done on the ; input. (define (nonterminal? x grammar) (member x (grammar-nonterminals grammar))) ;;;;;;;;;;;;;;;;; THE MAIN FUNCTION for Assignment 4 ;;;;;;;;;;;;;;; ; This function takes a grammar and determines whether recursive- ; descent parsing is possible for the grammar ; More precisely, the function returns a string representing an ; error message if recursive-descent parsing is not possible. ; If it is possible, it returns an association list representing ; the FIRST function (cf. Louden, p 108) for nonterminal symbols ; of the grammar. Note that since our grammars have no epsilon- ; rules or optionality, the FIRST function needs to be defined ; only for nonterminal symbols. ; The function uses an auxiliary function get-first that computes ; the FIRST value of a single nonterminal symbol. Since FIRST ; is defined recursively, get-first and the other auxiliary ; functions maintain and pass an association list that keeps ; track of the FIRST values computed so far. ; Note that the grammar argument never changes, so it needn't ; be passed to the auxiliary functions. ; The auxiliary function uses memoization, since finding some ; FIRST values requires computing other FIRST values. ; The argument is checked to see whether it is a grammar ; structure. Note that run-time type errors are still possible ; if the grammar was not constructed with "build-grammar". ; Note that no error checking need be done on the arguments of the ; auxilliary functions. (define (find-first grammar) ; a predicate to recognize error messages (that is, to ; determine whether a called function has found an ; error. (define error? string?) ; An auxiliary function that converts lists of symbols ; to a string that prints the same way (define (symbol-list->string ls) (cond ((null? ls) "") (else (string-append (symbol->string (car ls)) " " (symbol-list->string (cdr ls)))))) ; This function returns a list of the rules whose left-hand ; side is a given symbol. An auxiliary function takes ; in addition the list of all the grammar rules, and returns ; the appropriate list. (define (get-rules lhs) (define (get-rules-rec lhs all-rules) (cond ((null? all-rules) null) ((eq? lhs (rule-lhs (car all-rules))) (cons (car all-rules) (get-rules-rec lhs (cdr all-rules)))) (else (get-rules-rec lhs (cdr all-rules))))) (get-rules-rec lhs (grammar-rules grammar))) ; This function returns a value in the intersection of two lists, ; if one exists. It returns #f otherwise. (define (overlap? list1 list2) (cond ((null? list1) #f) ((member (car list1) list2) (car list1)) (else (overlap? (cdr list1) list2)))) ; This function computes the FIRST value of a symbol, ; given arguments representing ; the symbol ; a list of the rules with the symbol as their LHS ; a list of the symbols already known to be members ; of the FIRST value of the given symbol ; an association list containing all known values of the ; FIRST function for all other symbols ; a list of the symbols whose FIRST value depends on ; the FIRST value of the symbol. ; If the rules for the given symbol have a conflict that makes ; recursive descent parsing impossible, a string representing ; an error message is returned. ; Otherwise the function returns a pair consisting of ; the updated 3d & 4th arguments -- that is, consisting of ; the final value of the FIRST function of the given symbol, ; and all values now known for the FIRST function. ; Note that this function only needs to be called if the value ; of the FIRST function on the given symbol does not appear ; in the association list given by the 4th argument. (define (process-rules symbol rules-for-symbol value-so-far first-list history-list) ; if the rules are exhausted, pair up the updated ; 3d & 4th arguments and return them (cond ((null? rules-for-symbol) (cons value-so-far first-list)) ; check for left recursion ((member symbol history-list) (string-append "there is left recursion involving " "the symbols: " (symbol-list->string (member symbol (reverse history-list))))) ; find the contribution of the next rule to the ; FIRST value -- this contribution depends on ; whether the rule's RHS begins with a nonterminal or not (else (let* ((first-of-rhs (car (rule-rhs (car rules-for-symbol)))) (first-value (if (nonterminal? first-of-rhs grammar) (get-first first-of-rhs first-list (cons symbol history-list)) (cons (list first-of-rhs) first-list)))) ; and combine it with the symbols found for the ; remaining rules, unless the latest contribution ; has resulted in an overlap (cond ((error? first-value) first-value) (else (let ((overlap (overlap? (car first-value) value-so-far))) (if overlap (string-append (symbol->string overlap) " is in the FIRST set " "of the right-hand side of " (rule->string (car rules-for-symbol)) " and of another rule with " "the same left-hand side ") (process-rules symbol (cdr rules-for-symbol) (append value-so-far (car first-value)) (cdr first-value) history-list))))))))) ; This function computes the value of the FIRST function for a ; given symbol. It takes two additional parameters. The ; first of these is an assocation list representing the values ; of the FIRST function that are already known. The second ; of these is a list of symbols whose FIRST value ; depends on the FIRST value of the symbol. This parameter ; is used to detect indirect left recursion ; If it is discovered during the computation that the grammar ; cannot use recursive descent parsing, a string representing ; an error message is returned. ; Otherwise the function returns a pair whose components are ; (as for PROCESS-RULES) the value of the FIRST function for ; the given symbol (represented as a list) and the updated ; value of the 2d argument (that is, an assocation list ; containing all known values of the FIRST function, including ; the newly computed one) ; 3 cases ; the symbol is a terminal, so isn't in the list of known values ; the symbol is a nonterminal in the list of known values ; the symbol is a nonterminal not in the list of known values ; so its value must be calculated (define (get-first symbol first-list history-list) ; FIRST values of terminals needn't be represented (if (not (member symbol (grammar-nonterminals grammar))) (cons null first-list) (let ((known-value (assoc symbol first-list))) ; values already known needn't be represented again (if known-value (cons (cdr known-value) first-list) ; process all the rules for the given symbol (let ((processing-result (process-rules symbol (get-rules symbol) null first-list history-list))) ; if a conflict is found, return an error message ; otherwise enter the current symbol and its value ; into the list of known values and return (if (error? processing-result) processing-result (cons (car processing-result) (cons (cons symbol (car processing-result)) (cdr processing-result))))))))) ; This function is a recursive version of the main FIND-FIRST ; function that computes the FIRST function for each ; nonterminal. If an error is detected at any point, ; it is simply propagated to the caller. ; It takes as its arugments a list of the nonterminals of ; the grammar, and an association list of the values of ; the FIRST function known so far. (define (find-first-rec nonterminals first-list) (cond ((null? nonterminals) first-list) (else (let ((first-result (get-first (car nonterminals) first-list null))) (cond ((error? first-result) first-result) (else (find-first-rec (cdr nonterminals) (cdr first-result)))))))) ; This body of the main FIND-FIRST function checks input of illegal ; type. If all is well, the nonterminals of the grammar are ; collected, and passed to the recursive version of the function. (if (grammar? grammar) (find-first-rec (grammar-nonterminals grammar) null) "find-first was called with a nongrammar argument"))