wenzelm@6118: (* Title: Pure/General/symbol.ML wenzelm@6116: ID: $Id$ wenzelm@6116: Author: Markus Wenzel, TU Muenchen wenzelm@8806: License: GPL (GNU GENERAL PUBLIC LICENSE) wenzelm@6116: wenzelm@12116: Generalized characters with infinitely many named symbols. wenzelm@6116: *) wenzelm@6116: wenzelm@6116: signature SYMBOL = wenzelm@6116: sig wenzelm@6116: type symbol wenzelm@6116: val space: symbol wenzelm@10953: val spaces: int -> symbol wenzelm@6857: val sync: symbol wenzelm@6857: val is_sync: symbol -> bool wenzelm@6857: val not_sync: symbol -> bool wenzelm@10747: val malformed: symbol wenzelm@6116: val eof: symbol wenzelm@6116: val is_eof: symbol -> bool wenzelm@6116: val not_eof: symbol -> bool wenzelm@6116: val stopper: symbol * (symbol -> bool) wenzelm@6116: val is_ascii: symbol -> bool wenzelm@6116: val is_letter: symbol -> bool wenzelm@6116: val is_digit: symbol -> bool wenzelm@12904: val is_quasi: symbol -> bool wenzelm@6116: val is_quasi_letter: symbol -> bool wenzelm@6116: val is_letdig: symbol -> bool wenzelm@6116: val is_blank: symbol -> bool paulson@13559: val is_identifier: symbol -> bool wenzelm@8230: val is_symbolic: symbol -> bool wenzelm@6116: val is_printable: symbol -> bool wenzelm@6272: val length: symbol list -> int wenzelm@11010: val strip_blanks: string -> string wenzelm@6116: val beginning: symbol list -> string berghofe@13730: val scan_id: string list -> string * string list wenzelm@6116: val scan: string list -> symbol * string list wenzelm@6640: val scanner: string -> (symbol list -> 'a * symbol list) -> symbol list -> 'a wenzelm@6116: val source: bool -> (string, 'a) Source.source -> wenzelm@6116: (symbol, (string, 'a) Source.source) Source.source wenzelm@6272: val explode: string -> symbol list wenzelm@12904: val bump_string: string -> string wenzelm@10953: val default_indent: string * int -> string wenzelm@10953: val add_mode: string -> (string -> string * real) * (string * int -> string) -> unit wenzelm@6692: val symbolsN: string wenzelm@6692: val xsymbolsN: string wenzelm@10923: val plain_output: string -> string wenzelm@6272: val output: string -> string wenzelm@6272: val output_width: string -> string * real wenzelm@10953: val indent: string * int -> string wenzelm@6116: end; wenzelm@6116: wenzelm@6116: structure Symbol: SYMBOL = wenzelm@6116: struct wenzelm@6116: wenzelm@6116: wenzelm@6272: (** generalized characters **) wenzelm@6272: wenzelm@6272: (*symbols, which are considered the smallest entities of any Isabelle wenzelm@6272: string, may be of the following form: wenzelm@6272: (a) ASCII symbols: a wenzelm@6272: (b) printable symbols: \ schirmer@14361: (c) control symbols: \<^ctrlident> wenzelm@6272: wenzelm@12116: output is subject to the print_mode variable (default: verbatim), wenzelm@12116: actual interpretation in display is up to front-end tools; wenzelm@6272: *) wenzelm@6272: wenzelm@6272: type symbol = string; wenzelm@6272: wenzelm@6272: val space = " "; wenzelm@10953: fun spaces k = Library.replicate_string k space; wenzelm@6857: val sync = "\\<^sync>"; wenzelm@10747: val malformed = "\\<^malformed>"; wenzelm@6272: val eof = ""; wenzelm@6272: wenzelm@6272: wenzelm@6272: (* kinds *) wenzelm@6272: wenzelm@6857: fun is_sync s = s = sync; wenzelm@6857: fun not_sync s = s <> sync; wenzelm@6857: wenzelm@6272: fun is_eof s = s = eof; wenzelm@6272: fun not_eof s = s <> eof; wenzelm@6272: val stopper = (eof, is_eof); wenzelm@6272: wenzelm@6272: fun is_ascii s = size s = 1 andalso ord s < 128; wenzelm@6272: skalberg@14171: local skalberg@14171: fun wrap s = "\\<" ^ s ^ ">" skalberg@14171: skalberg@14171: val cal_letters = skalberg@14171: ["A","B","C","D","E","F","G","H","I","J","K","L","M", skalberg@14171: "N","O","P","Q","R","S","T","U","V","W","X","Y","Z"] skalberg@14171: skalberg@14171: val small_letters = skalberg@14171: ["a","b","c","d","e","f","g","h","i","j","k","l","m", skalberg@14171: "n","o","p","q","r","s","t","u","v","w","x","y","z"] skalberg@14171: skalberg@14171: val goth_letters = skalberg@14171: ["AA","BB","CC","DD","EE","FF","GG","HH","II","JJ","KK","LL","MM", skalberg@14171: "NN","OO","PP","QQ","RR","SS","TT","UU","VV","WW","XX","YY","ZZ", skalberg@14171: "aa","bb","cc","dd","ee","ff","gg","hh","ii","jj","kk","ll","mm", skalberg@14171: "nn","oo","pp","qq","rr","ss","tt","uu","vv","ww","xx","yy","zz"] skalberg@14171: skalberg@14171: val greek_letters = skalberg@14171: ["alpha","beta","gamma","delta","epsilon","zeta","eta","theta", skalberg@14171: "iota","kappa",(*"lambda",*)"mu","nu","xi","pi","rho","sigma","tau", skalberg@14171: "upsilon","phi","psi","omega","Gamma","Delta","Theta","Lambda", skalberg@14171: "Xi","Pi","Sigma","Upsilon","Phi","Psi","Omega"] skalberg@14171: skalberg@14171: val bbb_letters = ["bool","complex","nat","rat","real","int"] skalberg@14171: kleing@14234: val control_letters = ["^isub", "^isup"] kleing@14232: skalberg@14171: val pre_letters = skalberg@14171: cal_letters @ skalberg@14171: small_letters @ skalberg@14171: goth_letters @ kleing@14232: greek_letters @ kleing@14232: control_letters kleing@14232: skalberg@14171: in kleing@14232: val ext_letters = map wrap pre_letters skalberg@14173: skalberg@14171: fun is_ext_letter s = String.isPrefix "\\<" s andalso s mem ext_letters skalberg@14171: end skalberg@14171: wenzelm@6272: fun is_letter s = skalberg@14171: (size s = 1 andalso skalberg@14171: (ord "A" <= ord s andalso ord s <= ord "Z" orelse skalberg@14171: ord "a" <= ord s andalso ord s <= ord "z")) skalberg@14171: orelse is_ext_letter s wenzelm@6272: wenzelm@6272: fun is_digit s = skalberg@14173: size s = 1 andalso ord "0" <= ord s andalso ord s <= ord "9" wenzelm@6272: wenzelm@12904: fun is_quasi "_" = true wenzelm@12904: | is_quasi "'" = true wenzelm@12904: | is_quasi _ = false; wenzelm@12904: wenzelm@12904: fun is_quasi_letter s = is_quasi s orelse is_letter s; wenzelm@6272: wenzelm@6272: val is_blank = paulson@14221: fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true wenzelm@6272: | "\160" => true | "\\" => true wenzelm@6272: | _ => false; wenzelm@6272: wenzelm@12904: fun is_letdig s = is_quasi_letter s orelse is_digit s; wenzelm@6272: wenzelm@8230: fun is_symbolic s = skalberg@14171: size s > 2 andalso nth_elem_string (2, s) <> "^" skalberg@14173: andalso not (is_ext_letter s); wenzelm@8230: wenzelm@6272: fun is_printable s = wenzelm@6272: size s = 1 andalso ord space <= ord s andalso ord s <= ord "~" orelse skalberg@14173: is_ext_letter s orelse wenzelm@8230: is_symbolic s; wenzelm@8230: schirmer@14361: fun is_ctrl_letter s = schirmer@14361: size s = 1 andalso ord space <= ord s andalso ord s <= ord "~" andalso s <> ">"; schirmer@14361: paulson@13559: fun is_identifier s = paulson@13559: case (explode s) of paulson@13559: [] => false paulson@13559: | c::cs => is_letter c andalso forall is_letdig cs; wenzelm@6272: wenzelm@10738: fun sym_length ss = foldl (fn (n, s) => wenzelm@10738: (if not (is_printable s) then 0 else wenzelm@10738: (case Library.try String.substring (s, 2, 4) of wenzelm@10738: Some s' => if s' = "long" orelse s' = "Long" then 2 else 1 wenzelm@10738: | None => 1)) + n) (0, ss); wenzelm@6272: wenzelm@11010: fun strip_blanks s = wenzelm@11010: implode (#1 (Library.take_suffix is_blank (#2 (Library.take_prefix is_blank (explode s))))); wenzelm@11010: wenzelm@6272: wenzelm@6272: (* beginning *) wenzelm@6272: wenzelm@6272: val smash_blanks = map (fn s => if is_blank s then space else s); wenzelm@6272: wenzelm@6272: fun beginning raw_ss = wenzelm@6272: let wenzelm@6272: val (all_ss, _) = take_suffix is_blank raw_ss; wenzelm@6272: val dots = if length all_ss > 10 then " ..." else ""; wenzelm@6272: val (ss, _) = take_suffix is_blank (take (10, all_ss)); wenzelm@6272: in implode (smash_blanks ss) ^ dots end; wenzelm@6272: wenzelm@6272: wenzelm@6272: wenzelm@8998: (** scanning through symbols **) wenzelm@6640: wenzelm@6640: fun scanner msg scan chs = wenzelm@6640: let wenzelm@6640: fun err_msg cs = msg ^ ": " ^ beginning cs; wenzelm@6640: val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan)); wenzelm@6640: in wenzelm@6640: (case fin_scan chs of wenzelm@6640: (result, []) => result wenzelm@6640: | (_, rest) => error (err_msg rest)) wenzelm@6640: end; wenzelm@6640: wenzelm@6640: wenzelm@6640: wenzelm@6272: (** symbol input **) wenzelm@6116: wenzelm@6116: (* scan *) wenzelm@6116: wenzelm@6116: val scan_id = Scan.one is_letter ^^ (Scan.any is_letdig >> implode); schirmer@14361: val scan_ctrlid = Scan.one is_letter ^^ (Scan.any is_ctrl_letter >> implode); wenzelm@6116: wenzelm@6116: val scan = schirmer@14361: $$ "\\" ^^ $$ "<" ^^ wenzelm@6116: !! (fn (cs, _) => "Malformed symbolic character specification: \\" ^ "<" ^ beginning cs) schirmer@14361: ((($$ "^" ^^ scan_ctrlid) || scan_id) ^^ $$ ">") || wenzelm@6116: Scan.one not_eof; wenzelm@6116: wenzelm@6116: (* source *) wenzelm@6116: wenzelm@10747: val recover = Scan.any ((not o is_blank) andf not_eof) >> K [malformed]; wenzelm@6116: wenzelm@6116: fun source do_recover src = wenzelm@6116: Source.source stopper (Scan.bulk scan) (if do_recover then Some recover else None) src; wenzelm@6116: wenzelm@6116: wenzelm@6116: (* explode *) wenzelm@6116: wenzelm@6116: fun no_syms [] = true wenzelm@6116: | no_syms ("\\" :: "<" :: _) = false wenzelm@12116: | no_syms (_ :: cs) = no_syms cs; wenzelm@6116: wenzelm@6116: fun sym_explode str = wenzelm@6116: let val chs = explode str in wenzelm@6116: if no_syms chs then chs (*tune trivial case*) wenzelm@12116: else the (Scan.read stopper (Scan.repeat scan) chs) wenzelm@6116: end; wenzelm@6116: wenzelm@6116: wenzelm@12904: (* bump_string -- increment suffix of lowercase letters like a base 26 number *) wenzelm@12904: wenzelm@12904: fun bump_string str = wenzelm@12904: let wenzelm@12904: fun bump [] = ["a"] wenzelm@12904: | bump ("z" :: ss) = "a" :: bump ss wenzelm@12904: | bump (s :: ss) = wenzelm@12904: if size s = 1 andalso ord "a" <= ord s andalso ord s < ord "z" wenzelm@12904: then chr (ord s + 1) :: ss wenzelm@12904: else "a" :: s :: ss; wenzelm@12904: val (cs, qs) = Library.take_suffix is_quasi (sym_explode str); wenzelm@12904: in implode (rev (bump (rev cs)) @ qs) end; wenzelm@12904: wenzelm@6272: wenzelm@6272: (** symbol output **) wenzelm@6272: wenzelm@10953: (* default *) wenzelm@6272: wenzelm@6272: fun string_size s = (s, real (size s)); wenzelm@6272: berghofe@13730: val escape = Scan.repeat berghofe@13730: ((($$ "\\" >> K "\\\\") ^^ Scan.optional ($$ "\\" >> K "\\\\") "" ^^ $$ "<" ^^ berghofe@13730: Scan.optional ($$ "^") "" ^^ scan_id ^^ $$ ">") || berghofe@13730: Scan.one not_eof) >> implode; berghofe@13730: wenzelm@6272: fun default_output s = wenzelm@6320: if not (exists_string (equal "\\") s) then string_size s berghofe@13730: else string_size (fst (Scan.finite stopper escape (explode s))); wenzelm@6272: wenzelm@10953: fun default_indent (_: string, k) = spaces k; wenzelm@6272: wenzelm@10953: wenzelm@6272: (* maintain modes *) wenzelm@6272: wenzelm@6692: val symbolsN = "symbols"; wenzelm@6692: val xsymbolsN = "xsymbols"; wenzelm@6692: wenzelm@12116: val modes = wenzelm@12116: ref (Symtab.empty: ((string -> string * real) * (string * int -> string)) Symtab.table); wenzelm@6272: wenzelm@6272: fun lookup_mode name = Symtab.lookup (! modes, name); wenzelm@6272: wenzelm@10953: fun add_mode name m = wenzelm@6272: (if is_none (lookup_mode name) then () wenzelm@6320: else warning ("Redeclaration of symbol print mode " ^ quote name); wenzelm@10953: modes := Symtab.update ((name, m), ! modes)); wenzelm@10953: wenzelm@10953: fun get_mode () = wenzelm@10953: if_none (get_first lookup_mode (! print_mode)) (default_output, default_indent); wenzelm@6272: wenzelm@6272: wenzelm@6272: (* mode output *) wenzelm@6272: wenzelm@10953: fun output_width x = #1 (get_mode ()) x; wenzelm@6272: val output = #1 o output_width; wenzelm@10923: val plain_output = #1 o default_output; wenzelm@6272: wenzelm@10953: fun indent x = #2 (get_mode ()) x; wenzelm@10953: wenzelm@6272: wenzelm@6116: (*final declarations of this structure!*) wenzelm@6272: val length = sym_length; wenzelm@6116: val explode = sym_explode; wenzelm@6272: wenzelm@6116: wenzelm@6116: end;