# HG changeset patch # User wenzelm # Date 1734128587 -3600 # Node ID 81a72b7fcb0cce61d0385eaf423383411e68a044 # Parent 68dc8ffc94c246e13bb76a651b4e6ad73bc52981 minor performance tuning: avoid excessive Library.insert and Symbol.explode operations; diff -r 68dc8ffc94c2 -r 81a72b7fcb0c src/Pure/General/scan.ML --- a/src/Pure/General/scan.ML Thu Dec 12 22:53:06 2024 +0100 +++ b/src/Pure/General/scan.ML Fri Dec 13 23:23:07 2024 +0100 @@ -87,6 +87,7 @@ val is_literal: lexicon -> string list -> bool val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list val empty_lexicon: lexicon + val build_lexicon: (lexicon -> lexicon) -> lexicon val extend_lexicon: string list -> lexicon -> lexicon val make_lexicon: string list list -> lexicon val dest_lexicon: lexicon -> string list @@ -308,6 +309,9 @@ datatype lexicon = Lexicon of (bool * lexicon) Symtab.table; val empty_lexicon = Lexicon Symtab.empty; + +fun build_lexicon f : lexicon = f empty_lexicon; + fun is_empty_lexicon (Lexicon tab) = Symtab.is_empty tab; fun is_literal _ [] = false diff -r 68dc8ffc94c2 -r 81a72b7fcb0c src/Pure/Syntax/syntax.ML --- a/src/Pure/Syntax/syntax.ML Thu Dec 12 22:53:06 2024 +0100 +++ b/src/Pure/Syntax/syntax.ML Fri Dec 13 23:23:07 2024 +0100 @@ -360,6 +360,48 @@ +(** keywords **) + +abstype keywords = Keywords of Symset.T * Scan.lexicon lazy +with + +val empty_keywords = + Keywords (Symset.empty, Lazy.value Scan.empty_lexicon); + +fun make_keywords set = + let fun lex () = Scan.build_lexicon (set |> Symset.fold (Scan.extend_lexicon o Symbol.explode)) + in Keywords (set, Lazy.lazy lex) end; + +fun add_keywords s (keywords as Keywords (set, lex)) = + if Symset.member set s then keywords + else + let + val set' = Symset.insert s set; + val lex' = Lazy.map_finished (fn x => Scan.extend_lexicon (Symbol.explode s) x) lex; + in Keywords (set', lex') end; + +fun merge_keywords (keywords1 as Keywords (set1, lex1), keywords2 as Keywords (set2, lex2)) = + if pointer_eq (keywords1, keywords2) then keywords1 + else if Symset.is_empty set1 then keywords2 + else if Symset.is_empty set2 then keywords1 + else if Symset.subset (set1, set2) then keywords2 + else if Symset.subset (set2, set1) then keywords1 + else + let + val set' = Symset.merge (set1, set2); + val lex' = Lazy.value (Scan.merge_lexicons (apply2 Lazy.force (lex1, lex2))); + in Keywords (set', lex') end; + +fun member_keywords (Keywords (set, _)) = Symset.member set; + +fun dest_keywords (Keywords (set, _)) = sort_strings (Symset.dest set); + +fun tokenize_keywords (Keywords (_, lex)) = Lexicon.tokenize (Lazy.force lex); + +end; + + + (** tables of translation functions **) (* parse (ast) translations *) @@ -417,7 +459,7 @@ datatype syntax = Syntax of { input: Syntax_Ext.xprod list, - lexicon: Scan.lexicon, + keywords: keywords, gram: Parser.gram Synchronized.cache, consts: unit Graph.T, prmodes: string list, @@ -451,8 +493,8 @@ fun is_const (Syntax ({consts, ...}, _)) c = Graph.defined consts c andalso not (Lexicon.is_marked c); -fun is_keyword (Syntax ({lexicon, ...}, _)) = Scan.is_literal lexicon o Symbol.explode; -fun tokenize (Syntax ({lexicon, ...}, _)) = Lexicon.tokenize lexicon; +fun is_keyword (Syntax ({keywords, ...}, _)) = member_keywords keywords; +fun tokenize (Syntax ({keywords, ...}, _)) = tokenize_keywords keywords; fun parse (Syntax ({gram, ...}, _)) = Parser.parse (cache_eval gram); fun parse_ast_translation (Syntax ({parse_ast_trtab, ...}, _)) = lookup_tr parse_ast_trtab; @@ -502,7 +544,7 @@ val empty_syntax = Syntax ({input = [], - lexicon = Scan.empty_lexicon, + keywords = empty_keywords, gram = Synchronized.cache (fn () => Parser.empty_gram), consts = Graph.empty, prmodes = [], @@ -519,7 +561,7 @@ fun update_syntax (mode, inout) syn_ext (Syntax (tabs, _)) = let - val {input, lexicon, gram, consts = consts1, prmodes, parse_ast_trtab, parse_ruletab, + val {input, keywords, gram, consts = consts1, prmodes, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab, print_ruletab, print_ast_trtab, prtabs} = tabs; val Syntax_Ext.Syn_Ext {xprods, consts = consts2, parse_ast_translation, parse_rules, parse_translation, print_translation, print_rules, @@ -530,7 +572,7 @@ in Syntax ({input = new_xprods @ input, - lexicon = fold Scan.extend_lexicon (Syntax_Ext.delims_of new_xprods) lexicon, + keywords = (fold o Syntax_Ext.fold_delims) add_keywords new_xprods keywords, gram = if null new_xprods then gram else extend_gram new_xprods input gram, consts = fold update_consts consts2 consts1, prmodes = insert (op =) mode prmodes, @@ -551,7 +593,7 @@ let val Syntax_Ext.Syn_Ext {xprods, consts = _, parse_ast_translation, parse_rules, parse_translation, print_translation, print_rules, print_ast_translation} = syn_ext; - val {input, lexicon, gram, consts, prmodes, parse_ast_trtab, parse_ruletab, + val {input, keywords, gram, consts, prmodes, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab, print_ruletab, print_ast_trtab, prtabs} = tabs; val input' = if inout then subtract (op =) xprods input else input; val changed = length input <> length input'; @@ -559,7 +601,10 @@ in Syntax ({input = input', - lexicon = if changed then Scan.make_lexicon (Syntax_Ext.delims_of input') else lexicon, + keywords = + if changed + then make_keywords (Symset.build (input' |> (fold o Syntax_Ext.fold_delims) Symset.insert)) + else keywords, gram = if changed then new_gram input' else gram, consts = consts, prmodes = prmodes, @@ -577,12 +622,12 @@ fun merge_syntax (Syntax (tabs1, _), Syntax (tabs2, _)) = let - val {input = input1, lexicon = lexicon1, gram = gram1, consts = consts1, + val {input = input1, keywords = keywords1, gram = gram1, consts = consts1, prmodes = prmodes1, parse_ast_trtab = parse_ast_trtab1, parse_ruletab = parse_ruletab1, parse_trtab = parse_trtab1, print_trtab = print_trtab1, print_ruletab = print_ruletab1, print_ast_trtab = print_ast_trtab1, prtabs = prtabs1} = tabs1; - val {input = input2, lexicon = lexicon2, gram = gram2, consts = consts2, + val {input = input2, keywords = keywords2, gram = gram2, consts = consts2, prmodes = prmodes2, parse_ast_trtab = parse_ast_trtab2, parse_ruletab = parse_ruletab2, parse_trtab = parse_trtab2, print_trtab = print_trtab2, print_ruletab = print_ruletab2, print_ast_trtab = print_ast_trtab2, prtabs = prtabs2} = tabs2; @@ -602,7 +647,7 @@ in Syntax ({input = input', - lexicon = Scan.merge_lexicons (lexicon1, lexicon2), + keywords = merge_keywords (keywords1, keywords2), gram = gram', consts = Graph.merge_acyclic (op =) (consts1, consts2) @@ -629,11 +674,11 @@ fun pretty_gram (Syntax (tabs, _)) = let - val {lexicon, prmodes, gram, ...} = tabs; + val {keywords, prmodes, gram, ...} = tabs; val prmodes' = sort_strings (filter_out (fn s => s = "") prmodes); in [Pretty.block (Pretty.breaks - (Pretty.str "lexicon:" :: map (Pretty.quote o Pretty.keyword1) (Scan.dest_lexicon lexicon))), + (Pretty.str "lexicon:" :: map (Pretty.quote o Pretty.keyword1) (dest_keywords keywords))), Pretty.big_list "productions:" (Parser.pretty_gram (cache_eval gram)), pretty_strs_qs "print modes:" prmodes'] end; diff -r 68dc8ffc94c2 -r 81a72b7fcb0c src/Pure/Syntax/syntax_ext.ML --- a/src/Pure/Syntax/syntax_ext.ML Thu Dec 12 22:53:06 2024 +0100 +++ b/src/Pure/Syntax/syntax_ext.ML Fri Dec 13 23:23:07 2024 +0100 @@ -19,8 +19,8 @@ Brk of int | En datatype xprod = XProd of string * xsymb list * string * int + val fold_delims: (string -> 'a -> 'a) -> xprod -> 'a -> 'a val chain_pri: int - val delims_of: xprod list -> Symbol.symbol list list datatype syn_ext = Syn_Ext of { xprods: xprod list, @@ -107,12 +107,9 @@ datatype xprod = XProd of string * xsymb list * string * int; -val chain_pri = ~1; (*dummy for chain productions*) +fun fold_delims f (XProd (_, xsymbs, _, _)) = fold (fn Delim s => f s | _ => I) xsymbs; -fun delims_of xprods = - fold (fn XProd (_, xsymbs, _, _) => - fold (fn Delim s => insert (op =) s | _ => I) xsymbs) xprods [] - |> map Symbol.explode; +val chain_pri = ~1; (*dummy for chain productions*)