(* Simple example of a recursive descent parser for expressions *) (* Available under Gnu General Public Licence, GPL. *) (* L.A., Dept Comp Sci, Monash, 1988 and CSSE, Monash .au 2005 *) (* http://www.allisons.org/ll/FP/SML/1997/ *) type Ide = string; datatype Symbol = (* lexical items *) IdentSy of string | NumSy of int | (* x, 7 *) plusSy | minusSy | timesSy | divSy | (* + * - / *) eqSy | neSy | ltSy | leSy | gtSy | geSy | (* = <> < <= > >= *) notSy | andSy | orSy | (* not and or *) openSy | closeSy | (* ( ) *) eofSy | symbolErr of string; (* ------------------------------------------------------------------------- *) (* Abstract Syntax: Exp ::= Ide | Num | Exp Bopr Exp | Uopr Exp *) datatype Uopr = uminus | knot; (* operators, unary *) datatype Bopr = plus | minus | times | divide | (* operators, binary *) eq | ne | le | lt | ge | gt | conj | disj ; datatype Exp = binexp of Exp * Bopr * Exp | (* expressions *) unexp of Uopr * Exp | varid of Ide | numeral of int | expErr of string (* =====================================================LA==.au==14/4/2005== *) (* lexical analysis *) fun member y [] = false | member y (x::xs) = x=y orelse member y xs; fun starts str xs = let fun s [] xs = true | s ys [] = false | s (y::ys) (x::xs) = if y=x then s ys xs else false; in s (explode str) xs end; fun number ip = (* number *) let fun nu [] n = (NumSy n, []) | nu (ip as (x::xs)) n = if x >= #"0" andalso x <= #"9" then nu xs (n*10+ord(x)-ord(#"0")) else (NumSy n, ip) in nu ip 0 end; fun word ip = (* reserved words and also identifiers *) let fun w r "and" = (andSy, r) | w r "or" = (orSy, r) | w r "not" = (notSy, r) | w r name = (IdentSy name, r) and id [] name = w [] name | id (ip as (x::xs)) name = if x >= #"a" andalso x <= #"z" then id xs (name ^ str(x)) else w ip name in id ip "" end (* of word *); fun insymbol [] = (eofSy, []) (* symbols *) | insymbol (ip as (x::xs)) = if member x [#" ", #"\n", #"\t"] then insymbol xs else if starts "(" ip then (openSy, xs) else if starts ")" ip then (closeSy, xs) else if starts "+" ip then (plusSy, xs) else if starts "-" ip then (minusSy, xs) else if starts "*" ip then (timesSy, xs) else if starts "/" ip then (divSy, xs) else if starts "<>" ip then (neSy, tl xs) else if starts "<=" ip then (leSy, tl xs) else if starts ">=" ip then (geSy, tl xs) else if starts "=" ip then (eqSy, xs) else if starts "<" ip then (ltSy, xs) else if starts ">" ip then (gtSy, xs) else if x >= #"0" andalso x <= #"9" then number ip else if x >= #"a" andalso x <= #"z" then word ip else (symbolErr ("insymbol: bad symbol:" ^ str(x) ^ "..."), []) ; fun lexical [] = [eofSy] (* lexical : char list -> Symbol list *) | lexical ip = let val (s,ip2) = insymbol ip in s :: (lexical ip2) end; (* =====================================================LA==.au==14/4/2005== *) (* Concrete Syntax: Exp ::= Exp (and|or) relExp | relExp relExp ::= relExp (= | <> | <= | >= | < | > ) arithExp | arithExp arithExp ::= arithExp (+|-) term | term term ::= term ( * | / ) operand operand ::= Ide | Num | (not|-) operand | "(" Exp ")" *) fun expression ip = let fun operand (openSy :: rest) = let val (subexp, (s::rest2)) = expression rest in if s=closeSy then (subexp, rest2) else (expErr "subexpression: missing )", rest) end | operand (IdentSy name :: rest) = (varid name, rest) | operand (NumSy n :: rest) = (numeral n, rest) | operand (minusSy :: rest) = unExp uminus rest | operand (notSy :: rest) = unExp knot rest | operand ip = (expErr "bad expression", ip) and unExp opr ip = let val (subopd, rest) = operand ip in (unexp (opr, subopd), rest) end and oprSeq nonTerminal separators operators ip = (* parse one or more nonTerminals separated by *) (* separators which stand for operators. *) let val (t, rest) = nonTerminal ip; (* the first *) fun oSeq t1 (s::ss) (opr::oprs) (ip as (x::xs)) = if s = x then (* NB. left associative... *) let val (t2, rest2) = nonTerminal xs in oSeq (binexp(t1,opr,t2)) separators operators rest2 end else (* s not = x *) oSeq t1 ss oprs ip | oSeq t1 _ _ ip = (t1, ip) in oSeq t separators operators rest end (* of oprSeq *) and term ip = oprSeq operand [timesSy, divSy] [times, divide] ip and arithExp ip = oprSeq term [plusSy, minusSy] [plus, minus] ip and relExp ip = oprSeq arithExp [eqSy,neSy,ltSy,leSy,gtSy,geSy] [eq, ne, lt, le, gt, ge ] ip in oprSeq relExp [andSy, orSy] [conj, disj] ip end (* of expression *) (* ------------------------------------------------------------------------- *) (* parser driver *) and parse ip = case expression ip of (expErr m, r) => expErr m | (e, eofSy::_) => e | (e, r) => expErr "junk on end of input" ; (* ========================================================================= *) (* some simple syntax test... *) (* e.g. expression(lexical(explode "(1+2)*3-4/5+x")); *) (* ========================================================================= *)