| author | blanchet | 
| Tue, 27 Jul 2010 14:02:15 +0200 | |
| changeset 38010 | ae3df22dd70b | 
| parent 36959 | f5417836dbea | 
| child 38228 | ada3ab6b9085 | 
| permissions | -rw-r--r-- | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 1 | (* Title: Pure/Isar/token.ML | 
| 5825 | 2 | Author: Markus Wenzel, TU Muenchen | 
| 3 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 5825 | 5 | *) | 
| 6 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 7 | signature TOKEN = | 
| 5825 | 8 | sig | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 9 | datatype kind = | 
| 27814 | 10 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 11 | Nat | String | AltString | Verbatim | Space | Comment | InternalValue | | |
| 12 | Malformed | Error of string | Sync | EOF | |
| 13 | datatype value = | |
| 14 | Text of string | Typ of typ | Term of term | Fact of thm list | | |
| 15 | Attribute of morphism -> attribute | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 16 | type T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 17 | val str_of_kind: kind -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 18 | val position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 19 | val end_position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 20 | val pos_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 21 | val eof: T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 22 | val is_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 23 | val not_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 24 | val not_sync: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 25 | val stopper: T Scan.stopper | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 26 | val kind_of: T -> kind | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 27 | val is_kind: kind -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 28 | val keyword_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 29 | val ident_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 30 | val is_proper: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 31 | val is_semicolon: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 32 | val is_comment: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 33 | val is_begin_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 34 | val is_end_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 35 | val is_blank: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 36 | val is_newline: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 37 | val source_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 38 | val source_position_of: T -> Symbol_Pos.text * Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 39 | val content_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 40 | val unparse: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 41 | val text_of: T -> string * string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 42 | val get_value: T -> value option | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 43 | val map_value: (value -> value) -> T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 44 | val mk_text: string -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 45 | val mk_typ: typ -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 46 | val mk_term: term -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 47 | val mk_fact: thm list -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 48 | val mk_attribute: (morphism -> attribute) -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 49 | val assignable: T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 50 | val assign: value option -> T -> unit | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 51 | val closure: T -> T | 
| 27814 | 52 | val ident_or_symbolic: string -> bool | 
| 30573 | 53 | val !!! : string -> (Symbol_Pos.T list -> 'a) -> Symbol_Pos.T list -> 'a | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 54 | val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 55 |   val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 56 | (Symbol_Pos.T, 'a) Source.source -> (T, (Symbol_Pos.T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 57 |   val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 58 | Position.T -> (Symbol.symbol, 'a) Source.source -> (T, | 
| 30573 | 59 | (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 60 | val read_antiq: Scan.lexicon -> (T list -> 'a * T list) -> Symbol_Pos.T list * Position.T -> 'a | 
| 5825 | 61 | end; | 
| 62 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 63 | structure Token: TOKEN = | 
| 5825 | 64 | struct | 
| 65 | ||
| 66 | (** tokens **) | |
| 67 | ||
| 27814 | 68 | (* token values *) | 
| 69 | ||
| 70 | (*The value slot assigns an (optional) internal value to a token, | |
| 71 | usually as a side-effect of special scanner setup (see also | |
| 72 | args.ML). Note that an assignable ref designates an intermediate | |
| 73 | state of internalization -- it is NOT meant to persist.*) | |
| 74 | ||
| 75 | datatype value = | |
| 76 | Text of string | | |
| 77 | Typ of typ | | |
| 78 | Term of term | | |
| 79 | Fact of thm list | | |
| 80 | Attribute of morphism -> attribute; | |
| 81 | ||
| 82 | datatype slot = | |
| 83 | Slot | | |
| 84 | Value of value option | | |
| 32738 | 85 | Assignable of value option Unsynchronized.ref; | 
| 27814 | 86 | |
| 87 | ||
| 5825 | 88 | (* datatype token *) | 
| 89 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 90 | datatype kind = | 
| 27814 | 91 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 92 | Nat | String | AltString | Verbatim | Space | Comment | InternalValue | | |
| 93 | Malformed | Error of string | Sync | EOF; | |
| 5825 | 94 | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 95 | datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot; | 
| 5825 | 96 | |
| 97 | val str_of_kind = | |
| 7026 | 98 | fn Command => "command" | 
| 99 | | Keyword => "keyword" | |
| 5825 | 100 | | Ident => "identifier" | 
| 101 | | LongIdent => "long identifier" | |
| 102 | | SymIdent => "symbolic identifier" | |
| 103 | | Var => "schematic variable" | |
| 104 | | TypeIdent => "type variable" | |
| 105 | | TypeVar => "schematic type variable" | |
| 106 | | Nat => "number" | |
| 107 | | String => "string" | |
| 17164 
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
 wenzelm parents: 
17069diff
changeset | 108 | | AltString => "back-quoted string" | 
| 5825 | 109 | | Verbatim => "verbatim text" | 
| 7682 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 110 | | Space => "white space" | 
| 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 111 | | Comment => "comment text" | 
| 27814 | 112 | | InternalValue => "internal value" | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 113 | | Malformed => "malformed symbolic character" | 
| 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 114 | | Error _ => "bad input" | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 115 | | Sync => "sync marker" | 
| 5825 | 116 | | EOF => "end-of-file"; | 
| 117 | ||
| 118 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 119 | (* position *) | 
| 5825 | 120 | |
| 27814 | 121 | fun position_of (Token ((_, (pos, _)), _, _)) = pos; | 
| 122 | fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; | |
| 27663 | 123 | |
| 5825 | 124 | val pos_of = Position.str_of o position_of; | 
| 125 | ||
| 126 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 127 | (* control tokens *) | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 128 | |
| 27814 | 129 | fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
 | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 130 | val eof = mk_eof Position.none; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 131 | |
| 27814 | 132 | fun is_eof (Token (_, (EOF, _), _)) = true | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 133 | | is_eof _ = false; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 134 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 135 | val not_eof = not o is_eof; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 136 | |
| 27814 | 137 | fun not_sync (Token (_, (Sync, _), _)) = false | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 138 | | not_sync _ = true; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 139 | |
| 27752 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 140 | val stopper = | 
| 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 141 | Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 142 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 143 | |
| 5825 | 144 | (* kind of token *) | 
| 145 | ||
| 27814 | 146 | fun kind_of (Token (_, (k, _), _)) = k; | 
| 147 | fun is_kind k (Token (_, (k', _), _)) = k = k'; | |
| 5825 | 148 | |
| 27814 | 149 | fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x | 
| 7026 | 150 | | keyword_with _ _ = false; | 
| 5825 | 151 | |
| 27814 | 152 | fun ident_with pred (Token (_, (Ident, x), _)) = pred x | 
| 16029 | 153 | | ident_with _ _ = false; | 
| 154 | ||
| 27814 | 155 | fun is_proper (Token (_, (Space, _), _)) = false | 
| 156 | | is_proper (Token (_, (Comment, _), _)) = false | |
| 5825 | 157 | | is_proper _ = true; | 
| 158 | ||
| 27814 | 159 | fun is_semicolon (Token (_, (Keyword, ";"), _)) = true | 
| 9130 | 160 | | is_semicolon _ = false; | 
| 161 | ||
| 27814 | 162 | fun is_comment (Token (_, (Comment, _), _)) = true | 
| 17069 | 163 | | is_comment _ = false; | 
| 164 | ||
| 27814 | 165 | fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true | 
| 8580 | 166 | | is_begin_ignore _ = false; | 
| 167 | ||
| 27814 | 168 | fun is_end_ignore (Token (_, (Comment, ">"), _)) = true | 
| 8580 | 169 | | is_end_ignore _ = false; | 
| 170 | ||
| 8651 | 171 | |
| 17069 | 172 | (* blanks and newlines -- space tokens obey lines *) | 
| 8651 | 173 | |
| 27814 | 174 | fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | 
| 17069 | 175 | | is_blank _ = false; | 
| 176 | ||
| 27814 | 177 | fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | 
| 8651 | 178 | | is_newline _ = false; | 
| 179 | ||
| 5825 | 180 | |
| 14991 | 181 | (* token content *) | 
| 9155 | 182 | |
| 27814 | 183 | fun source_of (Token ((source, (pos, _)), _, _)) = | 
| 184 | YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source])); | |
| 25642 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 wenzelm parents: 
25582diff
changeset | 185 | |
| 27885 | 186 | fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos); | 
| 27873 | 187 | |
| 27814 | 188 | fun content_of (Token (_, (_, x), _)) = x; | 
| 27747 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 189 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 190 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 191 | (* unparse *) | 
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 192 | |
| 18547 | 193 | fun escape q = | 
| 194 | implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; | |
| 195 | ||
| 27814 | 196 | fun unparse (Token (_, (kind, x), _)) = | 
| 14991 | 197 | (case kind of | 
| 18547 | 198 | String => x |> quote o escape "\"" | 
| 199 | | AltString => x |> enclose "`" "`" o escape "`" | |
| 14991 | 200 |   | Verbatim => x |> enclose "{*" "*}"
 | 
| 201 | | Comment => x |> enclose "(*" "*)" | |
| 28034 
33050286e65d
text_of Malformed: explode with spaces -- Output.output/escape ineffective by default;
 wenzelm parents: 
27885diff
changeset | 202 | | Malformed => space_implode " " (explode x) | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 203 | | Sync => "" | 
| 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 204 | | EOF => "" | 
| 14991 | 205 | | _ => x); | 
| 206 | ||
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 207 | fun text_of tok = | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 208 |   if is_semicolon tok then ("terminator", "")
 | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 209 | else | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 210 | let | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 211 | val k = str_of_kind (kind_of tok); | 
| 25642 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 wenzelm parents: 
25582diff
changeset | 212 | val s = unparse tok | 
| 27814 | 213 | handle ERROR _ => Symbol.separate_chars (content_of tok); | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 214 | in | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 215 | if s = "" then (k, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 216 | else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 217 | else (k, s) | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 218 | end; | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 219 | |
| 5825 | 220 | |
| 221 | ||
| 27814 | 222 | (** associated values **) | 
| 223 | ||
| 224 | (* access values *) | |
| 225 | ||
| 226 | fun get_value (Token (_, _, Value v)) = v | |
| 227 | | get_value _ = NONE; | |
| 228 | ||
| 229 | fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) | |
| 230 | | map_value _ tok = tok; | |
| 231 | ||
| 232 | ||
| 233 | (* make values *) | |
| 234 | ||
| 235 | fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); | |
| 236 | ||
| 237 | val mk_text = mk_value "<text>" o Text; | |
| 238 | val mk_typ = mk_value "<typ>" o Typ; | |
| 239 | val mk_term = mk_value "<term>" o Term; | |
| 240 | val mk_fact = mk_value "<fact>" o Fact; | |
| 241 | val mk_attribute = mk_value "<attribute>" o Attribute; | |
| 242 | ||
| 243 | ||
| 244 | (* static binding *) | |
| 245 | ||
| 246 | (*1st stage: make empty slots assignable*) | |
| 32738 | 247 | fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) | 
| 27814 | 248 | | assignable tok = tok; | 
| 249 | ||
| 250 | (*2nd stage: assign values as side-effect of scanning*) | |
| 251 | fun assign v (Token (_, _, Assignable r)) = r := v | |
| 252 | | assign _ _ = (); | |
| 253 | ||
| 254 | (*3rd stage: static closure of final values*) | |
| 32738 | 255 | fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | 
| 27814 | 256 | | closure tok = tok; | 
| 257 | ||
| 258 | ||
| 259 | ||
| 5825 | 260 | (** scanners **) | 
| 261 | ||
| 30573 | 262 | open Basic_Symbol_Pos; | 
| 5825 | 263 | |
| 30573 | 264 | fun !!! msg = Symbol_Pos.!!! ("Outer lexical error: " ^ msg);
 | 
| 5825 | 265 | |
| 266 | ||
| 267 | (* scan symbolic idents *) | |
| 268 | ||
| 20664 | 269 | val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~"); | 
| 5825 | 270 | |
| 8231 | 271 | val scan_symid = | 
| 27769 | 272 | Scan.many1 (is_sym_char o symbol) || | 
| 273 | Scan.one (Symbol.is_symbolic o symbol) >> single; | |
| 5825 | 274 | |
| 8231 | 275 | fun is_symid str = | 
| 276 | (case try Symbol.explode str of | |
| 15531 | 277 | SOME [s] => Symbol.is_symbolic s orelse is_sym_char s | 
| 278 | | SOME ss => forall is_sym_char ss | |
| 8231 | 279 | | _ => false); | 
| 280 | ||
| 27814 | 281 | fun ident_or_symbolic "begin" = false | 
| 282 | | ident_or_symbolic ":" = true | |
| 283 | | ident_or_symbolic "::" = true | |
| 284 | | ident_or_symbolic s = Syntax.is_identifier s orelse is_symid s; | |
| 5825 | 285 | |
| 286 | ||
| 287 | (* scan verbatim text *) | |
| 288 | ||
| 289 | val scan_verb = | |
| 27769 | 290 | $$$ "*" --| Scan.ahead (~$$$ "}") || | 
| 291 | Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; | |
| 5825 | 292 | |
| 293 | val scan_verbatim = | |
| 30573 | 294 |   (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
 | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 295 | (Symbol_Pos.change_prompt | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 296 | ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); | 
| 5825 | 297 | |
| 298 | ||
| 299 | (* scan space *) | |
| 300 | ||
| 19305 | 301 | fun is_space s = Symbol.is_blank s andalso s <> "\n"; | 
| 5825 | 302 | |
| 303 | val scan_space = | |
| 27769 | 304 | Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] || | 
| 305 | Scan.many (is_space o symbol) @@@ $$$ "\n"; | |
| 5825 | 306 | |
| 307 | ||
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 308 | (* scan comment *) | 
| 5825 | 309 | |
| 310 | val scan_comment = | |
| 30573 | 311 | Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos); | 
| 5825 | 312 | |
| 313 | ||
| 23678 | 314 | (* scan malformed symbols *) | 
| 315 | ||
| 316 | val scan_malformed = | |
| 27769 | 317 | $$$ Symbol.malformed |-- | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 318 | Symbol_Pos.change_prompt (Scan.many (Symbol.is_regular o symbol)) | 
| 27769 | 319 | --| Scan.option ($$$ Symbol.end_malformed); | 
| 27752 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 320 | |
| 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 321 | |
| 27663 | 322 | |
| 27769 | 323 | (** token sources **) | 
| 5825 | 324 | |
| 27769 | 325 | fun source_proper src = src |> Source.filter is_proper; | 
| 5825 | 326 | |
| 23678 | 327 | local | 
| 328 | ||
| 27769 | 329 | fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 330 | |
| 27799 | 331 | fun token k ss = | 
| 30573 | 332 | Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.untabify_content ss), Slot); | 
| 27799 | 333 | |
| 334 | fun token_range k (pos1, (ss, pos2)) = | |
| 30573 | 335 | Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.untabify_content ss), Slot); | 
| 23678 | 336 | |
| 27769 | 337 | fun scan (lex1, lex2) = !!! "bad input" | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 338 | (Symbol_Pos.scan_string >> token_range String || | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 339 | Symbol_Pos.scan_alt_string >> token_range AltString || | 
| 27799 | 340 | scan_verbatim >> token_range Verbatim || | 
| 341 | scan_comment >> token_range Comment || | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 342 | scan_space >> token Space || | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 343 | scan_malformed >> token Malformed || | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 344 | Scan.one (Symbol.is_sync o symbol) >> (token Sync o single) || | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 345 | (Scan.max token_leq | 
| 27769 | 346 | (Scan.max token_leq | 
| 347 | (Scan.literal lex2 >> pair Command) | |
| 348 | (Scan.literal lex1 >> pair Keyword)) | |
| 349 | (Syntax.scan_longid >> pair LongIdent || | |
| 350 | Syntax.scan_id >> pair Ident || | |
| 351 | Syntax.scan_var >> pair Var || | |
| 352 | Syntax.scan_tid >> pair TypeIdent || | |
| 353 | Syntax.scan_tvar >> pair TypeVar || | |
| 354 | Syntax.scan_nat >> pair Nat || | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 355 | scan_symid >> pair SymIdent) >> uncurry token)); | 
| 27769 | 356 | |
| 357 | fun recover msg = | |
| 358 | Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol) | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 359 | >> (single o token (Error msg)); | 
| 23678 | 360 | |
| 361 | in | |
| 5825 | 362 | |
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 363 | fun source' {do_recover} get_lex =
 | 
| 30573 | 364 | Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 365 | (Option.map (rpair recover) do_recover); | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 366 | |
| 5825 | 367 | fun source do_recover get_lex pos src = | 
| 30573 | 368 | Symbol_Pos.source pos src | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 369 | |> source' do_recover get_lex; | 
| 23678 | 370 | |
| 371 | end; | |
| 5825 | 372 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 373 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 374 | (* read_antiq *) | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 375 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 376 | fun read_antiq lex scan (syms, pos) = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 377 | let | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 378 |     fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 379 |       "@{" ^ Symbol_Pos.content syms ^ "}");
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 380 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 381 | val res = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 382 | Source.of_list syms | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 383 |       |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 384 | |> source_proper | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 385 | |> Source.source stopper (Scan.error (Scan.bulk scan)) NONE | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 386 | |> Source.exhaust; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 387 | in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 388 | |
| 5825 | 389 | end; |