# HG changeset patch # User wenzelm # Date 1517333898 -3600 # Node ID 1b8aad1909b73b1b28efaacd766896403635d09c # Parent 7747761fa067a0e677f3baf4845f6dfaa161ad67 tuned data structure and operations; diff -r 7747761fa067 -r 1b8aad1909b7 src/Pure/Syntax/lexicon.ML --- a/src/Pure/Syntax/lexicon.ML Tue Jan 30 16:12:50 2018 +0100 +++ b/src/Pure/Syntax/lexicon.ML Tue Jan 30 18:38:18 2018 +0100 @@ -23,6 +23,7 @@ datatype token_kind = Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | FloatSy | StrSy | StringSy | Cartouche | Space | Comment of Comment.kind option | EOF + val token_kind_ord: token_kind * token_kind -> order datatype token = Token of token_kind * string * Position.range val kind_of_token: token -> token_kind val str_of_token: token -> string @@ -115,6 +116,28 @@ Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | FloatSy | StrSy | StringSy | Cartouche | Space | Comment of Comment.kind option | EOF; +val token_kind_index = + fn Literal => 0 + | IdentSy => 1 + | LongIdentSy => 2 + | VarSy => 3 + | TFreeSy => 4 + | TVarSy => 5 + | NumSy => 6 + | FloatSy => 7 + | StrSy => 8 + | StringSy => 9 + | Cartouche => 10 + | Space => 11 + | Comment NONE => 12 + | Comment (SOME Comment.Comment) => 13 + | Comment (SOME Comment.Cancel) => 14 + | Comment (SOME Comment.Latex) => 15 + | EOF => 16; + +val token_kind_ord = int_ord o apply2 token_kind_index; + + datatype token = Token of token_kind * string * Position.range; fun kind_of_token (Token (k, _, _)) = k; diff -r 7747761fa067 -r 1b8aad1909b7 src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Tue Jan 30 16:12:50 2018 +0100 +++ b/src/Pure/Syntax/parser.ML Tue Jan 30 18:38:18 2018 +0100 @@ -26,6 +26,36 @@ (** datatype gram **) +(* tokens *) + +fun tokens_match toks = + (case apply2 Lexicon.kind_of_token toks of + (Lexicon.Literal, Lexicon.Literal) => op = (apply2 Lexicon.str_of_token toks) + | kinds => op = kinds); + +fun tokens_match_ord toks = + (case apply2 Lexicon.kind_of_token toks of + (Lexicon.Literal, Lexicon.Literal) => fast_string_ord (apply2 Lexicon.str_of_token toks) + | kinds => Lexicon.token_kind_ord kinds); + +structure Tokens = Table(type key = Lexicon.token val ord = tokens_match_ord); + +type tokens = Tokens.set; +val tokens_empty: tokens = Tokens.empty; +val tokens_merge: tokens * tokens -> tokens = Tokens.merge (K true); +fun tokens_insert tk : tokens -> tokens = Tokens.insert_set tk; +fun tokens_remove tk : tokens -> tokens = Tokens.remove_set tk; +fun tokens_member (tokens: tokens) = Tokens.defined tokens; +fun tokens_is_empty (tokens: tokens) = Tokens.is_empty tokens; +fun tokens_fold f (tokens: tokens) = Tokens.fold (f o #1) tokens; +val tokens_union = tokens_fold tokens_insert; +val tokens_subtract = tokens_fold tokens_remove; +fun tokens_find P (tokens: tokens) = + Tokens.get_first (fn (tok, ()) => if P tok then SOME tok else NONE) tokens; + + +(* nonterminals *) + (*production for the NTs are stored in a vector so we can identify NTs by their index*) type nt = int; @@ -45,12 +75,12 @@ | Nonterminal of nt * int; (*(tag, precedence)*) type nt_gram = - ((nts * Lexicon.token list) * + ((nts * tokens) * (Lexicon.token * (symb list * string * int) list) list); (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*) -val nt_gram_empty: nt_gram = ((nts_empty, []), []); +val nt_gram_empty: nt_gram = ((nts_empty, tokens_empty), []); type tags = nt Symtab.table; val tags_empty: tags = Symtab.empty; @@ -83,22 +113,16 @@ lambda productions are stored as normal productions and also as an entry in "lambdas"*) -fun tokens_match toks = - (case apply2 Lexicon.kind_of_token toks of - (Lexicon.Literal, Lexicon.Literal) => op = (apply2 Lexicon.str_of_token toks) - | kinds => op = kinds); - -val tokens_union = union tokens_match; -val tokens_subtract = subtract tokens_match; - val token_none = Lexicon.Token (Lexicon.Space, "", Position.no_range); (*productions for which no starting token is known yet are associated with this token*) val unknown_start = Lexicon.eof; -fun get_start (tok :: _) = tok - | get_start [] = unknown_start; +fun get_start tks = + (case Tokens.min tks of + SOME (tk, _) => tk + | NONE => unknown_start); (*convert productions to grammar; prod_count is of type "int option" and is only updated if it is <> NONE*) @@ -124,20 +148,18 @@ val ((_, from_tks), _) = Array.sub (prods, the chain); (*copy from's lookahead to chain's destinations*) - fun copy_lookahead [] added = added - | copy_lookahead (to :: tos) added = - let - val ((to_nts, to_tks), ps) = Array.sub (prods, to); + fun copy_lookahead to added = + let + val ((to_nts, to_tks), ps) = Array.sub (prods, to); - val new_tks = tokens_subtract to_tks from_tks; (*added lookahead tokens*) - val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps)); - in - copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added) - end; + val new_tks = tokens_subtract to_tks from_tks; (*added lookahead tokens*) + val to_tks' = tokens_merge (to_tks, new_tks); + val _ = Array.update (prods, to, ((to_nts, to_tks'), ps)); + in if tokens_is_empty new_tks then added else (to, new_tks) :: added end; val tos = chains_all_succs chains' [lhs]; in - (copy_lookahead tos [], + (fold copy_lookahead tos [], lambdas |> nts_member lambdas lhs ? fold nts_insert tos) end; @@ -185,28 +207,23 @@ is_none tk andalso nts_subset (nts, lambdas); val new_tks = - the_list tk + tokens_empty + |> fold tokens_insert (the_list tk) |> nts_fold (tokens_union o starts_for_nt) nts |> tokens_subtract l_starts; - val added_tks' = tokens_union added_tks new_tks; + val added_tks' = tokens_merge (added_tks, new_tks); val nt_dependencies' = nts_merge (nt_dependencies, nts); (*associate production with new starting tokens*) - fun copy ([]: Lexicon.token list) nt_prods = nt_prods - | copy (tk :: tks) nt_prods = - let - val old_prods = these (AList.lookup (op =) nt_prods tk); - val prods' = p :: old_prods; - in - nt_prods - |> AList.update (op =) (tk, prods') - |> copy tks - end; + fun copy (tk: Lexicon.token) nt_prods = + let val old_prods = these (AList.lookup (op =) nt_prods tk); + in AList.update (op =) (tk, p :: old_prods) nt_prods end; - val nt_prods' = - copy (new_tks |> new_lambda ? cons token_none) nt_prods; + val nt_prods' = nt_prods + |> tokens_fold copy new_tks + |> new_lambda ? copy token_none; in examine_prods ps (add_lambda orelse new_lambda) nt_dependencies' added_tks' nt_prods' @@ -227,19 +244,19 @@ (*add_lambda is true if an existing production of the nt produces lambda due to the new lambda NT l*) val (add_lambda, nt_dependencies, added_tks, nt_prods') = - examine_prods tk_prods false nts_empty [] nt_prods; + examine_prods tk_prods false nts_empty tokens_empty nt_prods; val new_nts = nts_merge (old_nts, nt_dependencies); + val new_tks = tokens_merge (old_tks, added_tks); val added_lambdas' = added_lambdas |> add_lambda ? cons nt; - val _ = - Array.update (prods, nt, ((new_nts, old_tks @ added_tks), nt_prods')); + val _ = Array.update (prods, nt, ((new_nts, new_tks), nt_prods')); (*N.B. that because the tks component is used to access existing productions we have to add new tokens at the _end_ of the list*) val added_starts' = - if null added_tks then added_starts + if tokens_is_empty added_tks then added_starts else ((nt, added_tks) :: added_starts); in (added_lambdas', added_starts') end; @@ -267,13 +284,15 @@ val (start_tk, start_nts) = lookahead_dependency lambdas' rhs nts_empty; val start_tks = - the_list start_tk + tokens_empty + |> fold tokens_insert (the_list start_tk) |> nts_fold (tokens_union o starts_for_nt) start_nts; val start_tks' = - (if is_some new_lambdas then [token_none] - else if null start_tks then [unknown_start] - else []) @ start_tks; + start_tks + |> (if is_some new_lambdas then tokens_insert token_none + else if tokens_is_empty start_tks then tokens_insert unknown_start + else I); (*add lhs NT to list of dependent NTs in lookahead*) fun add_nts nt initial = @@ -294,39 +313,37 @@ val new_tks = tokens_subtract old_tks start_tks; (*store new production*) - fun store [] prods is_new = - (prods, - if is_some prod_count andalso is_new then - Option.map (fn x => x + 1) prod_count - else prod_count, is_new) - | store (tk :: tks) prods is_new = - let - val tk_prods = these (AList.lookup (op =) prods tk); + fun store tk (prods, is_new) = + let + val tk_prods = these (AList.lookup (op =) prods tk); + + (*if prod_count = NONE then we can assume that + grammar does not contain new production already*) + val (tk_prods', is_new') = + if is_some prod_count then + if member (op =) tk_prods new_prod then (tk_prods, false) + else (new_prod :: tk_prods, true) + else (new_prod :: tk_prods, true); - (*if prod_count = NONE then we can assume that - grammar does not contain new production already*) - val (tk_prods', is_new') = - if is_some prod_count then - if member (op =) tk_prods new_prod then (tk_prods, false) - else (new_prod :: tk_prods, true) - else (new_prod :: tk_prods, true); + val prods' = prods + |> is_new' ? AList.update (op =) (tk, tk_prods'); + in (prods', is_new orelse is_new') end; - val prods' = - if is_new' then - AList.update (op =) (tk: Lexicon.token, tk_prods') prods - else prods; - in store tks prods' (is_new orelse is_new') end; - - val (nt_prods', prod_count', changed) = - if nt = lhs - then store start_tks' nt_prods false - else (nt_prods, prod_count, false); + val (nt_prods', changed) = (nt_prods, false) + |> nt = lhs ? tokens_fold store start_tks'; + val prod_count' = + if is_some prod_count andalso changed then + Option.map (fn x => x + 1) prod_count + else prod_count; val _ = - if not changed andalso null new_tks then () - else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), nt_prods')); + if not changed andalso tokens_is_empty new_tks then () + else + Array.update + (prods, nt, ((old_nts, tokens_merge (old_tks, new_tks)), nt_prods')); in add_tks nts - (if null new_tks then added else (nt, new_tks) :: added) prod_count' + (if tokens_is_empty new_tks then added else (nt, new_tks) :: added) + prod_count' end; val _ = nts_fold add_nts start_nts true; in @@ -343,7 +360,7 @@ (*token under which old productions which depend on changed_nt could be stored*) val key = - find_first (not o member (op =) new_tks) (starts_for_nt changed_nt) + tokens_find (not o tokens_member new_tks) (starts_for_nt changed_nt) |> the_default unknown_start; (*copy productions whose lookahead depends on changed_nt; @@ -364,26 +381,20 @@ val lambda = not (nts_is_unique depends) orelse not (nts_is_empty depends) andalso is_some tk - andalso member (op =) new_tks (the tk); + andalso tokens_member new_tks (the tk); (*associate production with new starting tokens*) - fun copy ([]: Lexicon.token list) nt_prods = nt_prods - | copy (tk :: tks) nt_prods = - let - val tk_prods = these (AList.lookup (op =) nt_prods tk); - - val tk_prods' = - if not lambda then p :: tk_prods - else insert (op =) p tk_prods; - (*if production depends on lambda NT we - have to look for duplicates*) - in - nt_prods - |> AList.update (op =) (tk, tk_prods') - |> copy tks - end; + fun copy tk nt_prods = + let + val tk_prods = these (AList.lookup (op =) nt_prods tk); + val tk_prods' = + if not lambda then p :: tk_prods + else insert (op =) p tk_prods; + (*if production depends on lambda NT we + have to look for duplicates*) + in AList.update (op =) (tk, tk_prods') nt_prods end; val result = - if update then (tk_prods, copy new_tks nt_prods) + if update then (tk_prods, tokens_fold copy new_tks nt_prods) else if key = unknown_start then (p :: tk_prods, nt_prods) else (tk_prods, nt_prods); in update_prods ps result end; @@ -405,11 +416,12 @@ val added_tks = tokens_subtract old_tks new_tks; in - if null added_tks then + if tokens_is_empty added_tks then (Array.update (prods, nt, (lookahead, nt_prods'')); added) else - (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods'')); + (Array.update + (prods, nt, ((old_nts, tokens_merge (old_tks, added_tks)), nt_prods'')); ((nt, added_tks) :: added)) end;