# HG changeset patch # User wenzelm # Date 1278099666 -7200 # Node ID ece640e48a6c06a2350440cc60bdba5ca38ff237 # Parent ad5489a6be482d0b9c0df1adc9ccdd57e0f92e8a do not open auxiliary ML structures; misc tuning; diff -r ad5489a6be48 -r ece640e48a6c src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Fri Jul 02 20:44:17 2010 +0200 +++ b/src/Pure/Syntax/parser.ML Fri Jul 02 21:41:06 2010 +0200 @@ -23,19 +23,16 @@ structure Parser: PARSER = struct -open Lexicon Syn_Ext; - - (** datatype gram **) type nt_tag = int; (*production for the NTs are stored in an array so we can identify NTs by their index*) -datatype symb = Terminal of token +datatype symb = Terminal of Lexicon.token | Nonterminal of nt_tag * int; (*(tag, precedence)*) -type nt_gram = ((nt_tag list * token list) * - (token option * (symb list * string * int) list) list); +type nt_gram = ((nt_tag list * Lexicon.token list) * + (Lexicon.token option * (symb list * string * int) list) list); (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) (*depent_nts is a list of all NTs whose lookahead @@ -53,8 +50,8 @@ lambda productions are stored as normal productions and also as an entry in "lambdas"*) -val UnknownStart = eof; (*productions for which no starting token is - known yet are associated with this token*) +val UnknownStart = Lexicon.eof; (*productions for which no starting token is + known yet are associated with this token*) (* get all NTs that are connected with a list of NTs (used for expanding chain list)*) @@ -125,7 +122,7 @@ (*get all known starting tokens for a nonterminal*) fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); - val token_union = uncurry (union matching_tokens); + val token_union = uncurry (union Lexicon.matching_tokens); (*update prods, lookaheads, and lambdas according to new lambda NTs*) val (added_starts', lambdas') = @@ -158,7 +155,7 @@ val nt_dependencies' = union (op =) nts nt_dependencies; (*associate production with new starting tokens*) - fun copy ([]: token option list) nt_prods = nt_prods + fun copy ([]: Lexicon.token option list) nt_prods = nt_prods | copy (tk :: tks) nt_prods = let val old_prods = these (AList.lookup (op =) nt_prods tk); @@ -259,7 +256,7 @@ let val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); - val new_tks = subtract matching_tokens old_tks start_tks; + val new_tks = subtract Lexicon.matching_tokens old_tks start_tks; (*store new production*) fun store [] prods is_new = @@ -278,7 +275,7 @@ else (new_prod :: tk_prods, true); val prods' = prods - |> is_new' ? AList.update (op =) (tk: token option, tk_prods'); + |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods'); in store tks prods' (is_new orelse is_new') end; val (nt_prods', prod_count', changed) = @@ -329,10 +326,10 @@ andalso member (op =) new_tks (the tk); (*associate production with new starting tokens*) - fun copy ([]: token list) nt_prods = nt_prods + fun copy ([]: Lexicon.token list) nt_prods = nt_prods | copy (tk :: tks) nt_prods = let - val tk_prods = (these o AList.lookup (op =) nt_prods) (SOME tk); + val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk)); val tk_prods' = if not lambda then p :: tk_prods @@ -359,7 +356,7 @@ val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); - val tk_prods = (these o AList.lookup (op =) nt_prods) key; + val tk_prods = these (AList.lookup (op =) nt_prods key); (*associate productions with new lookahead tokens*) val (tk_prods', nt_prods') = @@ -370,7 +367,7 @@ |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods') val added_tks = - subtract matching_tokens old_tks new_tks; + subtract Lexicon.matching_tokens old_tks new_tks; in if null added_tks then (Array.update (prods, nt, (lookahead, nt_prods')); process_nts nts added) @@ -381,7 +378,7 @@ end; val ((dependent, _), _) = Array.sub (prods, changed_nt); - in add_starts (starts @ (process_nts dependent [])) end; + in add_starts (starts @ process_nts dependent []) end; in add_starts added_starts' end; in add_prods prods chains' lambdas' prod_count ps end; @@ -394,8 +391,8 @@ val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); - fun pretty_symb (Terminal (Token (Literal, s, _))) = Pretty.quote (Pretty.str s) - | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) + fun pretty_symb (Terminal (Lexicon.Token (Lexicon.Literal, s, _))) = Pretty.quote (Pretty.str s) + | pretty_symb (Terminal tok) = Pretty.str (Lexicon.str_of_token tok) | pretty_symb (Nonterminal (tag, p)) = Pretty.str (nt_name tag ^ "[" ^ signed_string_of_int p ^ "]"); @@ -422,7 +419,6 @@ (** Operations on gramars **) -(*The mother of all grammars*) val empty_gram = Gram {nt_count = 0, prod_count = 0, tags = Symtab.empty, chains = [], lambdas = [], prods = Array.array (0, (([], []), []))}; @@ -454,12 +450,13 @@ delimiters and predefined terms are stored as terminals, nonterminals are converted to integer tags*) fun symb_of [] nt_count tags result = (nt_count, tags, rev result) - | symb_of ((Delim s) :: ss) nt_count tags result = - symb_of ss nt_count tags (Terminal (Token (Literal, s, Position.no_range)) :: result) - | symb_of ((Argument (s, p)) :: ss) nt_count tags result = + | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result = + symb_of ss nt_count tags + (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result) + | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result = let val (nt_count', tags', new_symb) = - case predef_term s of + case Lexicon.predef_term s of NONE => let val (nt_count', tags', s_tag) = get_tag nt_count tags s; in (nt_count', tags', Nonterminal (s_tag, p)) end @@ -471,7 +468,7 @@ (*Convert list of productions by invoking symb_of for each of them*) fun prod_of [] nt_count prod_count tags result = (nt_count, prod_count, tags, result) - | prod_of ((XProd (lhs, xsymbs, const, pri)) :: ps) + | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps) nt_count prod_count tags result = let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; @@ -604,7 +601,7 @@ datatype parsetree = Node of string * parsetree list | - Tip of token; + Tip of Lexicon.token; type state = nt_tag * int * (*identification and production precedence*) @@ -675,7 +672,7 @@ fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c = - if valued_token c then + if Lexicon.valued_token c then (A, j, ts @ [Tip c], sa, id, i) else (A, j, ts, sa, id, i); @@ -695,105 +692,105 @@ (*get all productions of a NT and NTs chained to it which can be started by specified token*) fun prods_for prods chains include_none tk nts = -let - fun token_assoc (list, key) = - let fun assoc [] result = result - | assoc ((keyi, pi) :: pairs) result = - if is_some keyi andalso matching_tokens (the keyi, key) - orelse include_none andalso is_none keyi then - assoc pairs (pi @ result) - else assoc pairs result; - in assoc list [] end; + let + fun token_assoc (list, key) = + let fun assoc [] result = result + | assoc ((keyi, pi) :: pairs) result = + if is_some keyi andalso Lexicon.matching_tokens (the keyi, key) + orelse include_none andalso is_none keyi then + assoc pairs (pi @ result) + else assoc pairs result; + in assoc list [] end; - fun get_prods [] result = result - | get_prods (nt :: nts) result = - let val nt_prods = snd (Array.sub (prods, nt)); - in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end; -in get_prods (connected_with chains nts nts) [] end; + fun get_prods [] result = result + | get_prods (nt :: nts) result = + let val nt_prods = snd (Array.sub (prods, nt)); + in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end; + in get_prods (connected_with chains nts nts) [] end; fun PROCESSS warned prods chains Estate i c states = -let -fun all_prods_for nt = prods_for prods chains true c [nt]; + let + fun all_prods_for nt = prods_for prods chains true c [nt]; -fun processS used [] (Si, Sii) = (Si, Sii) - | processS used (S :: States) (Si, Sii) = - (case S of - (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) => - let (*predictor operation*) - val (used', new_states) = - (case AList.lookup (op =) used nt of - SOME (usedPrec, l) => (*nonterminal has been processed*) - if usedPrec <= minPrec then - (*wanted precedence has been processed*) - (used, movedot_lambda S l) - else (*wanted precedence hasn't been parsed yet*) - let - val tk_prods = all_prods_for nt; + fun processS used [] (Si, Sii) = (Si, Sii) + | processS used (S :: States) (Si, Sii) = + (case S of + (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) => + let (*predictor operation*) + val (used', new_states) = + (case AList.lookup (op =) used nt of + SOME (usedPrec, l) => (*nonterminal has been processed*) + if usedPrec <= minPrec then + (*wanted precedence has been processed*) + (used, movedot_lambda S l) + else (*wanted precedence hasn't been parsed yet*) + let + val tk_prods = all_prods_for nt; - val States' = mkStates i minPrec nt - (getRHS' minPrec usedPrec tk_prods); - in (update_prec (nt, minPrec) used, - movedot_lambda S l @ States') - end + val States' = mkStates i minPrec nt + (getRHS' minPrec usedPrec tk_prods); + in (update_prec (nt, minPrec) used, + movedot_lambda S l @ States') + end - | NONE => (*nonterminal is parsed for the first time*) - let val tk_prods = all_prods_for nt; - val States' = mkStates i minPrec nt - (getRHS minPrec tk_prods); - in ((nt, (minPrec, [])) :: used, States') end); + | NONE => (*nonterminal is parsed for the first time*) + let val tk_prods = all_prods_for nt; + val States' = mkStates i minPrec nt + (getRHS minPrec tk_prods); + in ((nt, (minPrec, [])) :: used, States') end); - val dummy = - if not (!warned) andalso - length (new_states @ States) > (!branching_level) then - (warning "Currently parsed expression could be extremely ambiguous."; - warned := true) - else (); - in - processS used' (new_states @ States) (S :: Si, Sii) - end - | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) - processS used States - (S :: Si, - if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) - | (A, prec, ts, [], id, j) => (*completer operation*) - let val tt = if id = "" then ts else [Node (id, ts)] in - if j = i then (*lambda production?*) - let - val (used', O) = update_trees used (A, (tt, prec)); + val dummy = + if not (!warned) andalso + length (new_states @ States) > (!branching_level) then + (warning "Currently parsed expression could be extremely ambiguous."; + warned := true) + else (); in - case O of - NONE => - let val Slist = getS A prec Si; - val States' = map (movedot_nonterm tt) Slist; - in processS used' (States' @ States) (S :: Si, Sii) end - | SOME n => - if n >= prec then processS used' States (S :: Si, Sii) - else - let val Slist = getS' A prec n Si; - val States' = map (movedot_nonterm tt) Slist; - in processS used' (States' @ States) (S :: Si, Sii) end + processS used' (new_states @ States) (S :: Si, Sii) end - else - let val Slist = getStates Estate i j A prec - in processS used (map (movedot_nonterm tt) Slist @ States) - (S :: Si, Sii) - end - end) -in processS [] states ([], []) end; + | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) + processS used States + (S :: Si, + if Lexicon.matching_tokens (a, c) then movedot_term S c :: Sii else Sii) + | (A, prec, ts, [], id, j) => (*completer operation*) + let val tt = if id = "" then ts else [Node (id, ts)] in + if j = i then (*lambda production?*) + let + val (used', O) = update_trees used (A, (tt, prec)); + in + case O of + NONE => + let val Slist = getS A prec Si; + val States' = map (movedot_nonterm tt) Slist; + in processS used' (States' @ States) (S :: Si, Sii) end + | SOME n => + if n >= prec then processS used' States (S :: Si, Sii) + else + let val Slist = getS' A prec n Si; + val States' = map (movedot_nonterm tt) Slist; + in processS used' (States' @ States) (S :: Si, Sii) end + end + else + let val Slist = getStates Estate i j A prec + in processS used (map (movedot_nonterm tt) Slist @ States) + (S :: Si, Sii) + end + end) + in processS [] states ([], []) end; fun produce warned prods tags chains stateset i indata prev_token = (case Array.sub (stateset, i) of [] => let - val toks = if is_eof prev_token then indata else prev_token :: indata; - val pos = Position.str_of (pos_of_token prev_token); + val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata; + val pos = Position.str_of (Lexicon.pos_of_token prev_token); in if null toks then error ("Inner syntax error: unexpected end of input" ^ pos) else error (Pretty.string_of (Pretty.block (Pretty.str ("Inner syntax error" ^ pos ^ " at \"") :: - Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks))) @ + Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @ [Pretty.str "\""]))) end | s => @@ -807,21 +804,20 @@ end)); -fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) - l; +fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) l; fun earley prods tags chains startsymbol indata = let - val start_tag = case Symtab.lookup tags startsymbol of - SOME tag => tag - | NONE => error ("parse: Unknown startsymbol " ^ - quote startsymbol); - val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal eof], "", 0)]; + val start_tag = + (case Symtab.lookup tags startsymbol of + SOME tag => tag + | NONE => error ("parse: Unknown startsymbol " ^ quote startsymbol)); + val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; val s = length indata + 1; val Estate = Array.array (s, []); in Array.update (Estate, 0, S0); - get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata eof) + get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof) end; @@ -830,9 +826,9 @@ val end_pos = (case try List.last toks of NONE => Position.none - | SOME (Token (_, _, (_, end_pos))) => end_pos); + | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos); val r = - (case earley prods tags chains start (toks @ [mk_eof end_pos]) of + (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of [] => sys_error "parse: no parse trees" | pts => pts); in r end; @@ -842,7 +838,8 @@ let fun freeze a = map_range (curry Array.sub a) (Array.length a); val prods = maps snd (maps snd (freeze (#prods gram))); - fun guess (SOME ([Nonterminal (_, k), Terminal (Token (Literal, s, _)), Nonterminal (_, l)], _, j)) = + fun guess (SOME ([Nonterminal (_, k), + Terminal (Lexicon.Token (Lexicon.Literal, s, _)), Nonterminal (_, l)], _, j)) = if k = j andalso l = j + 1 then SOME (s, true, false, j) else if k = j + 1 then if l = j then SOME (s, false, true, j) else if l = j + 1 then SOME (s, false, false, j)