| author | huffman | 
| Wed, 28 Dec 2011 07:58:17 +0100 | |
| changeset 46010 | ebbc2d5cd720 | 
| parent 45666 | d83797ef0d2d | 
| child 46811 | 03a2dc9e0624 | 
| permissions | -rw-r--r-- | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 1 | (* Title: Pure/Isar/token.ML | 
| 5825 | 2 | Author: Markus Wenzel, TU Muenchen | 
| 3 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 5825 | 5 | *) | 
| 6 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 7 | signature TOKEN = | 
| 5825 | 8 | sig | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 9 | datatype kind = | 
| 27814 | 10 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 11 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 12 | Error of string | Sync | EOF | 
| 27814 | 13 | datatype value = | 
| 14 | Text of string | Typ of typ | Term of term | Fact of thm list | | |
| 15 | Attribute of morphism -> attribute | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 16 | type T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 17 | val str_of_kind: kind -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 18 | val position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 19 | val end_position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 20 | val pos_of: T -> string | 
| 44658 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 21 | val range: T list -> Position.range | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 22 | val eof: T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 23 | val is_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 24 | val not_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 25 | val not_sync: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 26 | val stopper: T Scan.stopper | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 27 | val kind_of: T -> kind | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 28 | val is_kind: kind -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 29 | val keyword_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 30 | val ident_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 31 | val is_proper: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 32 | val is_semicolon: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 33 | val is_comment: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 34 | val is_begin_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 35 | val is_end_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 36 | val is_blank: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 37 | val is_newline: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 38 | val source_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 39 | val source_position_of: T -> Symbol_Pos.text * Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 40 | val content_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 41 | val unparse: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 42 | val text_of: T -> string * string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 43 | val get_value: T -> value option | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 44 | val map_value: (value -> value) -> T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 45 | val mk_text: string -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 46 | val mk_typ: typ -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 47 | val mk_term: term -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 48 | val mk_fact: thm list -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 49 | val mk_attribute: (morphism -> attribute) -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 50 | val assignable: T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 51 | val assign: value option -> T -> unit | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 52 | val closure: T -> T | 
| 27814 | 53 | val ident_or_symbolic: string -> bool | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 54 | val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 55 |   val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 56 | (Symbol_Pos.T, 'a) Source.source -> (T, (Symbol_Pos.T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 57 |   val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 58 | Position.T -> (Symbol.symbol, 'a) Source.source -> (T, | 
| 30573 | 59 | (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 60 | val read_antiq: Scan.lexicon -> (T list -> 'a * T list) -> Symbol_Pos.T list * Position.T -> 'a | 
| 5825 | 61 | end; | 
| 62 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 63 | structure Token: TOKEN = | 
| 5825 | 64 | struct | 
| 65 | ||
| 66 | (** tokens **) | |
| 67 | ||
| 27814 | 68 | (* token values *) | 
| 69 | ||
| 70 | (*The value slot assigns an (optional) internal value to a token, | |
| 71 | usually as a side-effect of special scanner setup (see also | |
| 72 | args.ML). Note that an assignable ref designates an intermediate | |
| 73 | state of internalization -- it is NOT meant to persist.*) | |
| 74 | ||
| 75 | datatype value = | |
| 76 | Text of string | | |
| 77 | Typ of typ | | |
| 78 | Term of term | | |
| 79 | Fact of thm list | | |
| 80 | Attribute of morphism -> attribute; | |
| 81 | ||
| 82 | datatype slot = | |
| 83 | Slot | | |
| 84 | Value of value option | | |
| 32738 | 85 | Assignable of value option Unsynchronized.ref; | 
| 27814 | 86 | |
| 87 | ||
| 5825 | 88 | (* datatype token *) | 
| 89 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 90 | datatype kind = | 
| 27814 | 91 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 92 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 93 | Error of string | Sync | EOF; | 
| 5825 | 94 | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 95 | datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot; | 
| 5825 | 96 | |
| 97 | val str_of_kind = | |
| 7026 | 98 | fn Command => "command" | 
| 99 | | Keyword => "keyword" | |
| 5825 | 100 | | Ident => "identifier" | 
| 101 | | LongIdent => "long identifier" | |
| 102 | | SymIdent => "symbolic identifier" | |
| 103 | | Var => "schematic variable" | |
| 104 | | TypeIdent => "type variable" | |
| 105 | | TypeVar => "schematic type variable" | |
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 106 | | Nat => "natural number" | 
| 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 107 | | Float => "floating-point number" | 
| 5825 | 108 | | String => "string" | 
| 17164 
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
 wenzelm parents: 
17069diff
changeset | 109 | | AltString => "back-quoted string" | 
| 5825 | 110 | | Verbatim => "verbatim text" | 
| 7682 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 111 | | Space => "white space" | 
| 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 112 | | Comment => "comment text" | 
| 27814 | 113 | | InternalValue => "internal value" | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 114 | | Error _ => "bad input" | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 115 | | Sync => "sync marker" | 
| 5825 | 116 | | EOF => "end-of-file"; | 
| 117 | ||
| 118 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 119 | (* position *) | 
| 5825 | 120 | |
| 27814 | 121 | fun position_of (Token ((_, (pos, _)), _, _)) = pos; | 
| 122 | fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; | |
| 27663 | 123 | |
| 5825 | 124 | val pos_of = Position.str_of o position_of; | 
| 125 | ||
| 44658 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 126 | fun range [] = (Position.none, Position.none) | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 127 | | range toks = | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 128 | let | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 129 | val start_pos = position_of (hd toks); | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 130 | val end_pos = end_position_of (List.last toks); | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 131 | in (start_pos, end_pos) end; | 
| 
5bec9c15ef29
more direct Token.range_pos and Outer_Syntax.read_command, bypassing Thy_Syntax.span;
 wenzelm parents: 
43947diff
changeset | 132 | |
| 5825 | 133 | |
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 134 | (* control tokens *) | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 135 | |
| 27814 | 136 | fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
 | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 137 | val eof = mk_eof Position.none; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 138 | |
| 27814 | 139 | fun is_eof (Token (_, (EOF, _), _)) = true | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 140 | | is_eof _ = false; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 141 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 142 | val not_eof = not o is_eof; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 143 | |
| 27814 | 144 | fun not_sync (Token (_, (Sync, _), _)) = false | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 145 | | not_sync _ = true; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 146 | |
| 27752 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 147 | val stopper = | 
| 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 148 | Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 149 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 150 | |
| 5825 | 151 | (* kind of token *) | 
| 152 | ||
| 27814 | 153 | fun kind_of (Token (_, (k, _), _)) = k; | 
| 154 | fun is_kind k (Token (_, (k', _), _)) = k = k'; | |
| 5825 | 155 | |
| 27814 | 156 | fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x | 
| 7026 | 157 | | keyword_with _ _ = false; | 
| 5825 | 158 | |
| 27814 | 159 | fun ident_with pred (Token (_, (Ident, x), _)) = pred x | 
| 16029 | 160 | | ident_with _ _ = false; | 
| 161 | ||
| 27814 | 162 | fun is_proper (Token (_, (Space, _), _)) = false | 
| 163 | | is_proper (Token (_, (Comment, _), _)) = false | |
| 5825 | 164 | | is_proper _ = true; | 
| 165 | ||
| 27814 | 166 | fun is_semicolon (Token (_, (Keyword, ";"), _)) = true | 
| 9130 | 167 | | is_semicolon _ = false; | 
| 168 | ||
| 27814 | 169 | fun is_comment (Token (_, (Comment, _), _)) = true | 
| 17069 | 170 | | is_comment _ = false; | 
| 171 | ||
| 27814 | 172 | fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true | 
| 8580 | 173 | | is_begin_ignore _ = false; | 
| 174 | ||
| 27814 | 175 | fun is_end_ignore (Token (_, (Comment, ">"), _)) = true | 
| 8580 | 176 | | is_end_ignore _ = false; | 
| 177 | ||
| 8651 | 178 | |
| 17069 | 179 | (* blanks and newlines -- space tokens obey lines *) | 
| 8651 | 180 | |
| 27814 | 181 | fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | 
| 17069 | 182 | | is_blank _ = false; | 
| 183 | ||
| 27814 | 184 | fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | 
| 8651 | 185 | | is_newline _ = false; | 
| 186 | ||
| 5825 | 187 | |
| 14991 | 188 | (* token content *) | 
| 9155 | 189 | |
| 43731 
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
 wenzelm parents: 
43709diff
changeset | 190 | fun source_of (Token ((source, (pos, _)), (_, x), _)) = | 
| 
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
 wenzelm parents: 
43709diff
changeset | 191 | if YXML.detect x then x | 
| 45666 | 192 | else | 
| 193 | YXML.string_of | |
| 194 | (XML.Elem (Isabelle_Markup.token (Position.properties_of pos), [XML.Text source])); | |
| 25642 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 wenzelm parents: 
25582diff
changeset | 195 | |
| 27885 | 196 | fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos); | 
| 27873 | 197 | |
| 27814 | 198 | fun content_of (Token (_, (_, x), _)) = x; | 
| 27747 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 199 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 200 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 201 | (* unparse *) | 
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 202 | |
| 27814 | 203 | fun unparse (Token (_, (kind, x), _)) = | 
| 14991 | 204 | (case kind of | 
| 43773 | 205 | String => Symbol_Pos.quote_string_qq x | 
| 206 | | AltString => Symbol_Pos.quote_string_bq x | |
| 207 |   | Verbatim => enclose "{*" "*}" x
 | |
| 208 | | Comment => enclose "(*" "*)" x | |
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 209 | | Sync => "" | 
| 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 210 | | EOF => "" | 
| 14991 | 211 | | _ => x); | 
| 212 | ||
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 213 | fun text_of tok = | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 214 |   if is_semicolon tok then ("terminator", "")
 | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 215 | else | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 216 | let | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 217 | val k = str_of_kind (kind_of tok); | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40290diff
changeset | 218 | val s = unparse tok; | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 219 | in | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 220 | if s = "" then (k, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 221 | else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 222 | else (k, s) | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 223 | end; | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 224 | |
| 5825 | 225 | |
| 226 | ||
| 27814 | 227 | (** associated values **) | 
| 228 | ||
| 229 | (* access values *) | |
| 230 | ||
| 231 | fun get_value (Token (_, _, Value v)) = v | |
| 232 | | get_value _ = NONE; | |
| 233 | ||
| 234 | fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) | |
| 235 | | map_value _ tok = tok; | |
| 236 | ||
| 237 | ||
| 238 | (* make values *) | |
| 239 | ||
| 240 | fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); | |
| 241 | ||
| 242 | val mk_text = mk_value "<text>" o Text; | |
| 243 | val mk_typ = mk_value "<typ>" o Typ; | |
| 244 | val mk_term = mk_value "<term>" o Term; | |
| 245 | val mk_fact = mk_value "<fact>" o Fact; | |
| 246 | val mk_attribute = mk_value "<attribute>" o Attribute; | |
| 247 | ||
| 248 | ||
| 249 | (* static binding *) | |
| 250 | ||
| 251 | (*1st stage: make empty slots assignable*) | |
| 32738 | 252 | fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) | 
| 27814 | 253 | | assignable tok = tok; | 
| 254 | ||
| 255 | (*2nd stage: assign values as side-effect of scanning*) | |
| 256 | fun assign v (Token (_, _, Assignable r)) = r := v | |
| 257 | | assign _ _ = (); | |
| 258 | ||
| 259 | (*3rd stage: static closure of final values*) | |
| 32738 | 260 | fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | 
| 27814 | 261 | | closure tok = tok; | 
| 262 | ||
| 263 | ||
| 264 | ||
| 5825 | 265 | (** scanners **) | 
| 266 | ||
| 30573 | 267 | open Basic_Symbol_Pos; | 
| 5825 | 268 | |
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43773diff
changeset | 269 | fun !!! msg = Symbol_Pos.!!! (fn () => "Outer lexical error: " ^ msg); | 
| 5825 | 270 | |
| 271 | ||
| 272 | (* scan symbolic idents *) | |
| 273 | ||
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
40531diff
changeset | 274 | val is_sym_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~"); | 
| 5825 | 275 | |
| 8231 | 276 | val scan_symid = | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 277 | Scan.many1 (is_sym_char o Symbol_Pos.symbol) || | 
| 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 278 | Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; | 
| 5825 | 279 | |
| 8231 | 280 | fun is_symid str = | 
| 281 | (case try Symbol.explode str of | |
| 15531 | 282 | SOME [s] => Symbol.is_symbolic s orelse is_sym_char s | 
| 283 | | SOME ss => forall is_sym_char ss | |
| 8231 | 284 | | _ => false); | 
| 285 | ||
| 27814 | 286 | fun ident_or_symbolic "begin" = false | 
| 287 | | ident_or_symbolic ":" = true | |
| 288 | | ident_or_symbolic "::" = true | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 289 | | ident_or_symbolic s = Lexicon.is_identifier s orelse is_symid s; | 
| 5825 | 290 | |
| 291 | ||
| 292 | (* scan verbatim text *) | |
| 293 | ||
| 294 | val scan_verb = | |
| 27769 | 295 | $$$ "*" --| Scan.ahead (~$$$ "}") || | 
| 296 | Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; | |
| 5825 | 297 | |
| 298 | val scan_verbatim = | |
| 30573 | 299 |   (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
 | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 300 | (Symbol_Pos.change_prompt | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 301 | ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); | 
| 5825 | 302 | |
| 303 | ||
| 304 | (* scan space *) | |
| 305 | ||
| 19305 | 306 | fun is_space s = Symbol.is_blank s andalso s <> "\n"; | 
| 5825 | 307 | |
| 308 | val scan_space = | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 309 | Scan.many1 (is_space o Symbol_Pos.symbol) @@@ Scan.optional ($$$ "\n") [] || | 
| 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 310 | Scan.many (is_space o Symbol_Pos.symbol) @@@ $$$ "\n"; | 
| 5825 | 311 | |
| 312 | ||
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 313 | (* scan comment *) | 
| 5825 | 314 | |
| 315 | val scan_comment = | |
| 30573 | 316 | Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos); | 
| 5825 | 317 | |
| 318 | ||
| 27663 | 319 | |
| 27769 | 320 | (** token sources **) | 
| 5825 | 321 | |
| 27769 | 322 | fun source_proper src = src |> Source.filter is_proper; | 
| 5825 | 323 | |
| 23678 | 324 | local | 
| 325 | ||
| 27769 | 326 | fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 327 | |
| 27799 | 328 | fun token k ss = | 
| 43709 
717e96cf9527
discontinued special treatment of hard tabulators;
 wenzelm parents: 
42503diff
changeset | 329 | Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); | 
| 27799 | 330 | |
| 331 | fun token_range k (pos1, (ss, pos2)) = | |
| 43709 
717e96cf9527
discontinued special treatment of hard tabulators;
 wenzelm parents: 
42503diff
changeset | 332 | Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.content ss), Slot); | 
| 23678 | 333 | |
| 27769 | 334 | fun scan (lex1, lex2) = !!! "bad input" | 
| 42503 | 335 | (Symbol_Pos.scan_string_qq >> token_range String || | 
| 336 | Symbol_Pos.scan_string_bq >> token_range AltString || | |
| 27799 | 337 | scan_verbatim >> token_range Verbatim || | 
| 338 | scan_comment >> token_range Comment || | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 339 | scan_space >> token Space || | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 340 | Scan.one (Symbol.is_sync o Symbol_Pos.symbol) >> (token Sync o single) || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 341 | (Scan.max token_leq | 
| 27769 | 342 | (Scan.max token_leq | 
| 343 | (Scan.literal lex2 >> pair Command) | |
| 344 | (Scan.literal lex1 >> pair Keyword)) | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 345 | (Lexicon.scan_longid >> pair LongIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 346 | Lexicon.scan_id >> pair Ident || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 347 | Lexicon.scan_var >> pair Var || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 348 | Lexicon.scan_tid >> pair TypeIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 349 | Lexicon.scan_tvar >> pair TypeVar || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 350 | Lexicon.scan_float >> pair Float || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 351 | Lexicon.scan_nat >> pair Nat || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 352 | scan_symid >> pair SymIdent) >> uncurry token)); | 
| 27769 | 353 | |
| 354 | fun recover msg = | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 355 | Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 356 | >> (single o token (Error msg)); | 
| 23678 | 357 | |
| 358 | in | |
| 5825 | 359 | |
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 360 | fun source' {do_recover} get_lex =
 | 
| 30573 | 361 | Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 362 | (Option.map (rpair recover) do_recover); | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 363 | |
| 5825 | 364 | fun source do_recover get_lex pos src = | 
| 30573 | 365 | Symbol_Pos.source pos src | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 366 | |> source' do_recover get_lex; | 
| 23678 | 367 | |
| 368 | end; | |
| 5825 | 369 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 370 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 371 | (* read_antiq *) | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 372 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 373 | fun read_antiq lex scan (syms, pos) = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 374 | let | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 375 |     fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 376 |       "@{" ^ Symbol_Pos.content syms ^ "}");
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 377 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 378 | val res = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 379 | Source.of_list syms | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 380 |       |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 381 | |> source_proper | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 382 | |> Source.source stopper (Scan.error (Scan.bulk scan)) NONE | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 383 | |> Source.exhaust; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 384 | in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 385 | |
| 5825 | 386 | end; |