/*********************************************************************** This program simulates execution of a program in an simple, invented functional programming language with static type checking and dynamic scoping, using environments and activation records to implement this combination. A type is defined for activation records, and one is defined for environments. A type for lists is also defined. In this language, the main program (more precisely, the main function) statically encloses any subsidiary functions used. An encoding of a function call corresponds to a translation into a target language, where both the function called and each of its arguments are represented as an offset pair. The first element of the pair represents the static distance from the block where the identifier is used to the block where is it defined. The second element is the offset from the beginning of this latter block to the symbol itself. No check is made for duplicate indentifers among the parameters or local variables of a function block. The actual n arguments used for the call to the outermost function are the constants 1, 2, ..., n, where the constant k is represented as the offset pair (CONSTANT_FLAG, k). No storage is reclaimed. During a simulated run, the values of all actual values bound to formal parameters are printed. Global variables "fp" and "token" are used for the input file and for the token last read from this file. The program has been presented in a single file for convenience. The grammar for the language is: ::= . ::= function [] begin [] [] end ::= {} ::= {} ::= {} ::= call {} **********************************************************************/ #include #include #include #include #include typedef int BOOLEAN; const TRUE = 1; const FALSE = 0; const CONST_FLAG = -1; const LIST_SIZE = 10; /* maximum size of any list */ const TOKEN_SIZE = 20; /* maximum length of a token */ const ERROR_FLAG = -1; /* assume this is an illegal value */ char token[20]; FILE *fp; typedef struct function; typedef struct activation; /*/////////////////////// STRUCTURED TYPES ////////////////////////////*/ /* In an offset pair, the first is an integer giving the number of activation records backward to search second is an integer giving the index position within the given list in the given activation record */ typedef struct { int first; int second; } offset; /* Lists are implemented as arrays with a SIZE component which also gives the next available location. No range checks are made. */ typedef struct list { int size; void *symbol[20]; } list; /* An activation record contains a name field, provided solely for debugging purposes. Control links and activation links are provided. Activation records must also contain the values bound to both the parameters and function names used by the function being activated. */ typedef struct activation { char *name; /* just for debugging */ struct activation *control; struct activation *access; list *actuals; list *functs; } activation; /* An environment object is just a linked list of activation records. Since these records are linked both by control links and by activation links, the links are stored in the activation records, Thus the only component of an environment object is a pointer to the first activation record. */ typedef struct environment { activation *first; } environment; /* For representation of functions. The parameters, subroutine names (i.e., names of other functions defined in the current function's block) and instructions (i.e., the list of calls making up the function body) are represented as lists of symbols in the source language. The environment pointer "ep" is represented at compile time simply as a pointer to the calling function. */ typedef struct function { char *name; struct function *ep; list *parameter; list *subr_names; list *subroutines; list *instrs; list *code; } function; /************************ AUXILIARY FUNCTIONS **********************/ /* Insert a symbol at the end of a list */ void insert(list *ls, void *sym) { ls->symbol[ls->size++]=sym; } /* Create and return a new empty list */ list *build_empty_list(void) { list *l=malloc(sizeof(list)); assert(l); l->size=0; return l; } /* Build and return a representation of a function, given a name, a pointer to its caller, a pointer to a parameter list, a pointer to a list of subsidiary functions, and a pointer to a list of instructions. These latter 3 may be passed as NULL pointers, in which case the proper representation for an empty list -- an array with size counter 0 -- is constructed for them */ function *build_function(char *n, function *e, list *p, list *s, list *i) { function *f=malloc(sizeof(function)); assert (f); f->name=malloc(sizeof(char)*(1+strlen(n))); assert (f->name); strcpy(f->name,n); f->ep=e; if (p) f->parameter=p; else f->parameter=build_empty_list(); f->subr_names=build_empty_list(); /* should be empty */ if (e) insert(e->subr_names,n); if (s) f->subroutines=s; else f->subroutines=build_empty_list(); if (i) f->instrs=i; else f->instrs=build_empty_list(); f->code=build_empty_list(); return f; } /* Return the number of arguments required by a function f */ int get_arity(function *f) { return f->parameter->size; } /* Print an offset pair */ void print(offset *o) { printf("%5d:%5d",o->first,o->second); } /* Print a list -- assumes that "printf" can print the members */ void print_ls(list *l) { int i; if (!l) printf("empty list pointer"); else { for (i=0; isize; i++) { printf(l->symbol[i]); printf(" "); } printf("\n"); } } /* return the (first) position, if any, in a list "ls" where a given symbol "sym" is to be found */ int search(list *ls,void* sym) { int i; for (i=0; isize; i++) if (strcmp(ls->symbol[i],sym)==0) { return i; } return ERROR_FLAG; } /* Return an encoding of the run-time position of a function symbol "fname", relative to the current function "ff". Search outward from the current function, following the links given by environment pointer. Print an error message and return NULL if the function is not found, or if there is a mismatch in the number of arguments. To make this latter test, the length of the call to "ff" is passed as the parameter "call_size". */ offset *search_f(function *ff, char *fname, int call_size) { function *fd; /* 3 temporaries */ list *lp; int arity; function *f=ff; /* the function being searched */ offset *o=malloc(sizeof(offset)); /* the value to be output */ int posn=0; /* and its */ int counter=0; /* components */ assert (o); while (f) { posn=search(f->subr_names,fname); if (posn != ERROR_FLAG) { o->first=counter; o->second=posn; /* check numbers of formals & actuals */ fd=f->subroutines->symbol[posn]; lp=fd->parameter; arity=lp->size; if (arity+1 == call_size) return o; printf("wrong number of arguments in call"); return NULL; } counter++; f=f->ep; } printf("\n%s","undefined function"); return NULL; } /* return an encoding of the run-time position of a given identifier. Search outward from the current function, following the links given by environment pointers. Prints an error message and returns NULL if the identifier is not found. */ offset *search_v(function *ff,char *var) { function *f=ff; /* the function currently searched */ offset *output = malloc(sizeof(offset)); /* the output offset */ int posn; /* and its */ int counter; /* components */ assert (output); counter=0; posn=0; while (f) { posn=search(f->parameter,var); if (posn!=ERROR_FLAG) { output->first=counter; output->second=posn; return output; } counter++; f=f->ep; } printf("%s\n","undeclared argument"); return NULL; } /* Lists of offsets are used to represent function calls in the target language; this function does the translation. The first argument is the source language version of the call; the second is a pointer to a function object. This function object contains the information about block structure needed to translate the call to the corresponding function. Thus this function can merely translate the function name by a call to "search_f" and each argument by a call to "search_v". If any search fails, the function signals an error by returning an error flag in the "size" field of the constructed list. */ list *build_list_of_offsets(list *instr, function *f) { offset *o; /* 2 temporaries */ int i; list *lo=malloc(sizeof(list)); /* for output */ assert (lo); /* search for the function name */ o=search_f(f,instr->symbol[0],instr->size); if (o) { lo->size=instr->size; lo->symbol[0]=o; } else { lo->size=ERROR_FLAG; return lo; } /* search for the arguments */ for (i=1; isize; i++) { o=search_v(f,instr->symbol[i]); if (!o) { lo->size=ERROR_FLAG; return lo; } lo->symbol[i]=o; } return lo; } /* This function translates the body of the function f from the source language to the target language by building one list_of_offsets for each instruction. It also returns an error flag in the "size" field if an error is discovered. */ list *build_list_of_encodings(function *f) { int i; /* 2 temporaries */ list *instrs=f->instrs; list *le=malloc(sizeof(list)); /* for output */ list *lo=malloc(sizeof(list)); /* for members of the output list */ assert(le); assert(lo); if (!le) printf("error in line 402"); le->size=instrs->size; for(i=0; isize; i++) { lo=build_list_of_offsets(f->instrs->symbol[i],f); le->symbol[i]=lo; if (lo->size==ERROR_FLAG) { le->size=ERROR_FLAG; return le; } } return le; } /* Create an empty activation record. The access & control fields are initialized by a PUSH function; the actuals field by a BIND function. This function is used only for top-level calls (i.e., calls not from other functions), so the "functs" field will never be used. */ activation *build_empty_activation(void) { activation *a=malloc(sizeof(activation)); assert (a); a->control=NULL; a->access=NULL; a->actuals=NULL; a->functs=NULL; a->name=malloc(6*sizeof(char)); strcpy(a->name,"DUMMY"); return a; } /* Create an activation record for a function f. The fields of f will be initialized later by PUSH and BIND as in the previous function */ activation *build_activation(function *f) { int i; activation *a=malloc(sizeof(activation)); assert(a); a->name=f->name; a->actuals=build_empty_list(); a->functs=f->subroutines; return a; } /* Build and return an empty environment */ environment *build_environment(void) { environment *e; e=malloc(sizeof(environment)); assert(e); e->first=build_empty_activation(); return e; } /* look up a function symbol in the environment given a pair (i,j) of integers by following the access link i times and looking in position j in the resulting activation record. */ function *get_function(environment *e,offset *o) { int ii; activation *a=e->first; for (ii=1; ii<=o->first; ii++) a=a->access; return a->functs->symbol[o->second]; } /* Return the (run-time) value stored at a given offset from a given environment, interpreting the offset as in the previous function. /* Note that since by assumption the evaluation is performed after the new activation record is pushed, the first activation record to be searched should be e->first->control */ offset *eval(environment *e, offset *o) { activation *a=e->first->control; int ii; if (o->first==CONST_FLAG) return o; for (ii=1; ii<=o->first; ii++) a=a->access; return a->actuals->symbol[o->second]; } /* remove an activation record from the environment */ void pop(environment *e) { e->first=e->first->control; } /* add a given activation record to the environment. This function also must set the access link correctly, and thus needs a third argument "offset" to tell how many access links to follow when doing so. */ void push(environment *e, activation *a, int offset) { activation *temp=e->first; int i; for (i=1; i<=offset; i++) temp=temp->access; a->access=temp; a->control=e->first; e->first=a; } /* Initializes lists of actual parameters. Note that the "actuals" field contains the entire the function call, so it has one extra initial element -- the function name. Thus its number "arity" of arguments is one less than the call size. As a side effect, prints the encoded values of the actual arguments.*/ void bind(environment *e, list *call, list *actuals) { int i; /* 2 temporaries */ offset *o; int arity=-1+call->size; for (i=0; isymbol[i+1]); print(o); printf(" "); insert(actuals,o); } printf("\n"); } /* Simulate execution of a function, given an environment and a call to the function. Do so by stepping through each instruction of the body in turn, construcing a new activation record for the function call that the instruction represents, pushing it onto the environment, and looking the name of the new function call and the name of each of its parameters up in this environment, and then simulating recursively the execution of this new function. Recall that we are not assuming any constants in this (unrealistically) simple programming language. */ void run_rec(environment *e,list *call, function *f) { activation *a; /* an activation record for f */ list *code; /* the body of f */ list *new_call; /* a call from f to another function */ function *ff; /* the function for this call */ offset *o; /* the offset for this function */ int i; /* counter for calls in f's body */ bind(e,call,e->first->actuals); code=f->code; for (i=0; isize; i++) { new_call=(list *) code->symbol[i]; /* get the ith instr. of f */ o=new_call->symbol[0]; ff=get_function(e,o); a=build_activation(ff); push(e,a,o->first); run_rec(e,new_call,ff); } pop(e); } /* run a top-level function by constructing an (encoded) call and an appropriate environment, and passing them to the general version of "run" defined above */ void run(environment *e, function *f) { int i; /* argument counter */ activation *a; /* top level activation */ list *call=build_empty_list(); /* top level call */ offset *o=malloc(sizeof(offset)); /* top level offset */ assert(o); o->first=0; /* which must be (0 0) */ o->second=0; insert(call,o); /* create dummy constants for actual top-level arguments */ for (i=1; i<=f->parameter->size; i++) { o=malloc(sizeof(offset)); assert(o); o->first=CONST_FLAG; o->second=i-1; insert(call,o); } a=build_activation(f); printf("Note: top-level bindings are printed twice\n"); bind(e,call,a->actuals); push(e,a,0); run_rec(e,call,f); } /* Get the next token from the input file and assign it to the global variable "token". */ void GetToken(FILE *fp) { fscanf(fp,"%s",&token); printf("%s\n",token); } /* Determine whether the input string "w" represents a reserved word */ BOOLEAN reserved(char *w) { if (strcmp(w,"function")==0) return TRUE; if (strcmp(w,"begin")==0) return TRUE; if (strcmp(w,"end")==0) return TRUE; if (strcmp(w,"call")==0) return TRUE; if (strcmp(w,"local")==0) return TRUE; return FALSE; } /**************** RECOGNITION FUNCTIONS FOR NONTERMINALS ***************/ /* Determine whether the current token represents an identifier. For this simple grammar, it does iff it does not represent a reserved word This function consumes no tokens */ BOOLEAN is_id(void) { return !reserved(token); } /* Recognition functions for nonterminal categories. Most take a pointer to a Boolean value "success" so that they can return an indication of whether or not they have failed as well as a structure */ /* recognition function for identifiers -- consumes a token */ BOOLEAN id(void) { if (reserved(token)) return FALSE; GetToken(fp); return TRUE; } /* Encodes the rule ::= {} Returns a list of the parameter symbols */ list *params(BOOLEAN *success) { char *x; list *l=build_empty_list(); x=malloc(sizeof(char)*(1+strlen(token))); assert(x); strcpy(x,token); *success=TRUE; if (!id()) { *success=FALSE; return NULL; } insert(l,x); x=malloc(sizeof(char)*(1+strlen(token))); assert(x); strcpy(x,token); while (id()) { insert(l,x); x=malloc(sizeof(char)*(1+strlen(token))); assert(x); strcpy(x,token); } return l; } /* Encodes the rule ::= call {} Returns a list of symbols making up the call */ list *call(BOOLEAN *success) { list *slist=build_empty_list(); /* for uncoded body */ char *x; char *fname=malloc(sizeof(char)*20); assert(fname); *success=TRUE; if (!strcmp(token,"call")==0) { *success=FALSE; return build_empty_list(); } GetToken(fp); strcpy(fname,token); if (!id()) { *success=FALSE; return NULL; } x=malloc(sizeof(char)*(1+strlen(token))); assert(x); strcpy(x,token); insert(slist,fname); while (id()) { insert(slist,x); x=malloc(sizeof(char)*(1+strlen(token))); assert(x); strcpy(x,token); } return slist; } /* encodes the rule ::= {} Returns a list of calls, each a list of symbols */ list *body(BOOLEAN *success) { list *blist=build_empty_list(); /* for output */ list *clist=build_empty_list(); /* for elements of the output */ clist=call(success); if (!*success) { *success=TRUE; return build_empty_list(); } insert(blist,clist); while (strcmp(token,"call")==0) { clist=call(success); insert(blist,clist); } return blist; } /* forward declaration to avoid indirect recursion between the functions "functions" and "funct" */ list *functions(BOOLEAN *, function *); /* Encodes the rule ::= function [] begin [] [] end Returns a function object representing the function. */ function *funct(BOOLEAN *success, function *caller) { function *f; /* for output */ char *fname; /* the function's name */ list *plist=build_empty_list(); /* parameters, local functions */ list *flist=build_empty_list(); /* and body of function */ list *blist=build_empty_list(); /* being created */ *success=TRUE; if (strcmp(token,"function")!=0) { *success=FALSE; return NULL; } GetToken(fp); fname=malloc(sizeof(char)*(1+strlen(token))); assert(fname); strcpy(fname,token); if (!id()) { printf("%s\n","no function name!"); *success=FALSE; return NULL; } if (is_id()) { plist=params(success); if (!*success) { printf("%s\n","bad parameter list"); return NULL; } } if (strcmp(token,"begin")!=0) { printf("%s\n","no BEGIN in function def"); *success=FALSE; return NULL; } GetToken(fp); /* side effect below: fname is put on callers' subr_names list */ f=build_function(fname,caller,plist,NULL,NULL); if (strcmp(token,"function")==0) { flist=functions(success,f); /* can call only after f val found */ if (!*success) return NULL; f->subroutines = flist; } if (strcmp(token,"call")==0) { /* should reclaim old blist?? */ blist=body(success); if (!*success) return NULL; f->instrs = blist; f->code=build_list_of_encodings(f); if (f->code->size==ERROR_FLAG) { *success=FALSE; return NULL; } } if (strcmp(token,"end")!=0) { printf("%s\n","no END to match BEGIN"); *success=FALSE; return NULL; } GetToken(fp); return f; } /* Encodes the rule ::= {} Install the compiled versions of all functions local to a given function CALLER. Return a list of these functions. Return TRUE in SUCCESS iff there are no errors. Want to insert into representation of caller so that searches for nonlocal variables succeed. */ list *functions(BOOLEAN *success, function *caller) { list *fs=build_empty_list(); /* for output */ function *f; /* an element of the output list */ f=funct(success,caller); if (!*success) return build_empty_list(); insert(caller->subroutines,f); insert(fs,f); while (strcmp(token,"function")==0) { f=funct(success,caller); if (!*success) return fs; if (!f) { *success=TRUE; return fs; } insert(caller->subroutines,f); insert(fs,f); } return fs; } /* encodes the rule ::= . Returns a representation of the top-level function */ function *program(BOOLEAN *s, function *caller) { function *f; /* for output */ f=funct(s,caller); if (!*s) return NULL; if (strcmp(token,".")!=0) { printf("no period at end of program"); *s=FALSE; return NULL; } return f; } /* The top-level parsing function Returns a representation of the top-level function parsed */ function *compile(void) { function *f; BOOLEAN *success=malloc(sizeof(BOOLEAN)); assert(success); *success=TRUE; GetToken(fp); /* using one symbol of lookahead */ f=program(success,NULL); /* parse as function with no caller */ /* should dispose of success */ return f; } /* The main C program. Here "f" corresponds to the main function in the simulated program. More precisely, it is an object of type "funct". The constructor for this type corresponds to a parser for the simulated program. A member function "run" for this type simulates execution of the compiled program. */ int main(void) { environment *e =build_environment(); char filename[20]; activation *a; function *f; printf("enter filename:"); scanf("%s",&filename); fp=fopen(filename,"r"); if (!fp) { printf("can't open file\n"); return 0; } f=compile(); if (!f) printf("%s\n"," --failed to compile"); else run(e,f); fclose(fp); return 0; }