| author | wenzelm | 
| Wed, 08 Jun 2011 20:58:51 +0200 | |
| changeset 43284 | 04d473e883df | 
| parent 42503 | 27514b6fbe93 | 
| child 43709 | 717e96cf9527 | 
| permissions | -rw-r--r-- | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 1 | (* Title: Pure/Isar/token.ML | 
| 5825 | 2 | Author: Markus Wenzel, TU Muenchen | 
| 3 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 5825 | 5 | *) | 
| 6 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 7 | signature TOKEN = | 
| 5825 | 8 | sig | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 9 | datatype kind = | 
| 27814 | 10 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 11 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 12 | Error of string | Sync | EOF | 
| 27814 | 13 | datatype value = | 
| 14 | Text of string | Typ of typ | Term of term | Fact of thm list | | |
| 15 | Attribute of morphism -> attribute | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 16 | type T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 17 | val str_of_kind: kind -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 18 | val position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 19 | val end_position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 20 | val pos_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 21 | val eof: T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 22 | val is_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 23 | val not_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 24 | val not_sync: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 25 | val stopper: T Scan.stopper | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 26 | val kind_of: T -> kind | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 27 | val is_kind: kind -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 28 | val keyword_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 29 | val ident_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 30 | val is_proper: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 31 | val is_semicolon: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 32 | val is_comment: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 33 | val is_begin_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 34 | val is_end_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 35 | val is_blank: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 36 | val is_newline: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 37 | val source_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 38 | val source_position_of: T -> Symbol_Pos.text * Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 39 | val content_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 40 | val unparse: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 41 | val text_of: T -> string * string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 42 | val get_value: T -> value option | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 43 | val map_value: (value -> value) -> T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 44 | val mk_text: string -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 45 | val mk_typ: typ -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 46 | val mk_term: term -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 47 | val mk_fact: thm list -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 48 | val mk_attribute: (morphism -> attribute) -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 49 | val assignable: T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 50 | val assign: value option -> T -> unit | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 51 | val closure: T -> T | 
| 27814 | 52 | val ident_or_symbolic: string -> bool | 
| 30573 | 53 | val !!! : string -> (Symbol_Pos.T list -> 'a) -> Symbol_Pos.T list -> 'a | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 54 | val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 55 |   val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 56 | (Symbol_Pos.T, 'a) Source.source -> (T, (Symbol_Pos.T, 'a) Source.source) Source.source | 
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 57 |   val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 58 | Position.T -> (Symbol.symbol, 'a) Source.source -> (T, | 
| 30573 | 59 | (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 60 | val read_antiq: Scan.lexicon -> (T list -> 'a * T list) -> Symbol_Pos.T list * Position.T -> 'a | 
| 5825 | 61 | end; | 
| 62 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 63 | structure Token: TOKEN = | 
| 5825 | 64 | struct | 
| 65 | ||
| 66 | (** tokens **) | |
| 67 | ||
| 27814 | 68 | (* token values *) | 
| 69 | ||
| 70 | (*The value slot assigns an (optional) internal value to a token, | |
| 71 | usually as a side-effect of special scanner setup (see also | |
| 72 | args.ML). Note that an assignable ref designates an intermediate | |
| 73 | state of internalization -- it is NOT meant to persist.*) | |
| 74 | ||
| 75 | datatype value = | |
| 76 | Text of string | | |
| 77 | Typ of typ | | |
| 78 | Term of term | | |
| 79 | Fact of thm list | | |
| 80 | Attribute of morphism -> attribute; | |
| 81 | ||
| 82 | datatype slot = | |
| 83 | Slot | | |
| 84 | Value of value option | | |
| 32738 | 85 | Assignable of value option Unsynchronized.ref; | 
| 27814 | 86 | |
| 87 | ||
| 5825 | 88 | (* datatype token *) | 
| 89 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 90 | datatype kind = | 
| 27814 | 91 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 92 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 93 | Error of string | Sync | EOF; | 
| 5825 | 94 | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 95 | datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot; | 
| 5825 | 96 | |
| 97 | val str_of_kind = | |
| 7026 | 98 | fn Command => "command" | 
| 99 | | Keyword => "keyword" | |
| 5825 | 100 | | Ident => "identifier" | 
| 101 | | LongIdent => "long identifier" | |
| 102 | | SymIdent => "symbolic identifier" | |
| 103 | | Var => "schematic variable" | |
| 104 | | TypeIdent => "type variable" | |
| 105 | | TypeVar => "schematic type variable" | |
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 106 | | Nat => "natural number" | 
| 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 107 | | Float => "floating-point number" | 
| 5825 | 108 | | String => "string" | 
| 17164 
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
 wenzelm parents: 
17069diff
changeset | 109 | | AltString => "back-quoted string" | 
| 5825 | 110 | | Verbatim => "verbatim text" | 
| 7682 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 111 | | Space => "white space" | 
| 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 112 | | Comment => "comment text" | 
| 27814 | 113 | | InternalValue => "internal value" | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 114 | | Error _ => "bad input" | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 115 | | Sync => "sync marker" | 
| 5825 | 116 | | EOF => "end-of-file"; | 
| 117 | ||
| 118 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 119 | (* position *) | 
| 5825 | 120 | |
| 27814 | 121 | fun position_of (Token ((_, (pos, _)), _, _)) = pos; | 
| 122 | fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; | |
| 27663 | 123 | |
| 5825 | 124 | val pos_of = Position.str_of o position_of; | 
| 125 | ||
| 126 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 127 | (* control tokens *) | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 128 | |
| 27814 | 129 | fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
 | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 130 | val eof = mk_eof Position.none; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 131 | |
| 27814 | 132 | fun is_eof (Token (_, (EOF, _), _)) = true | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 133 | | is_eof _ = false; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 134 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 135 | val not_eof = not o is_eof; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 136 | |
| 27814 | 137 | fun not_sync (Token (_, (Sync, _), _)) = false | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 138 | | not_sync _ = true; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 139 | |
| 27752 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 140 | val stopper = | 
| 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 141 | Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 142 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 143 | |
| 5825 | 144 | (* kind of token *) | 
| 145 | ||
| 27814 | 146 | fun kind_of (Token (_, (k, _), _)) = k; | 
| 147 | fun is_kind k (Token (_, (k', _), _)) = k = k'; | |
| 5825 | 148 | |
| 27814 | 149 | fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x | 
| 7026 | 150 | | keyword_with _ _ = false; | 
| 5825 | 151 | |
| 27814 | 152 | fun ident_with pred (Token (_, (Ident, x), _)) = pred x | 
| 16029 | 153 | | ident_with _ _ = false; | 
| 154 | ||
| 27814 | 155 | fun is_proper (Token (_, (Space, _), _)) = false | 
| 156 | | is_proper (Token (_, (Comment, _), _)) = false | |
| 5825 | 157 | | is_proper _ = true; | 
| 158 | ||
| 27814 | 159 | fun is_semicolon (Token (_, (Keyword, ";"), _)) = true | 
| 9130 | 160 | | is_semicolon _ = false; | 
| 161 | ||
| 27814 | 162 | fun is_comment (Token (_, (Comment, _), _)) = true | 
| 17069 | 163 | | is_comment _ = false; | 
| 164 | ||
| 27814 | 165 | fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true | 
| 8580 | 166 | | is_begin_ignore _ = false; | 
| 167 | ||
| 27814 | 168 | fun is_end_ignore (Token (_, (Comment, ">"), _)) = true | 
| 8580 | 169 | | is_end_ignore _ = false; | 
| 170 | ||
| 8651 | 171 | |
| 17069 | 172 | (* blanks and newlines -- space tokens obey lines *) | 
| 8651 | 173 | |
| 27814 | 174 | fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | 
| 17069 | 175 | | is_blank _ = false; | 
| 176 | ||
| 27814 | 177 | fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | 
| 8651 | 178 | | is_newline _ = false; | 
| 179 | ||
| 5825 | 180 | |
| 14991 | 181 | (* token content *) | 
| 9155 | 182 | |
| 27814 | 183 | fun source_of (Token ((source, (pos, _)), _, _)) = | 
| 38229 | 184 | YXML.string_of (XML.Elem (Markup.token (Position.properties_of pos), [XML.Text source])); | 
| 25642 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 wenzelm parents: 
25582diff
changeset | 185 | |
| 27885 | 186 | fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos); | 
| 27873 | 187 | |
| 27814 | 188 | fun content_of (Token (_, (_, x), _)) = x; | 
| 27747 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 189 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 190 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 191 | (* unparse *) | 
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 192 | |
| 18547 | 193 | fun escape q = | 
| 194 | implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode; | |
| 195 | ||
| 27814 | 196 | fun unparse (Token (_, (kind, x), _)) = | 
| 14991 | 197 | (case kind of | 
| 18547 | 198 | String => x |> quote o escape "\"" | 
| 199 | | AltString => x |> enclose "`" "`" o escape "`" | |
| 14991 | 200 |   | Verbatim => x |> enclose "{*" "*}"
 | 
| 201 | | Comment => x |> enclose "(*" "*)" | |
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 202 | | Sync => "" | 
| 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 203 | | EOF => "" | 
| 14991 | 204 | | _ => x); | 
| 205 | ||
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 206 | fun text_of tok = | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 207 |   if is_semicolon tok then ("terminator", "")
 | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 208 | else | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 209 | let | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 210 | val k = str_of_kind (kind_of tok); | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40290diff
changeset | 211 | val s = unparse tok; | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 212 | in | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 213 | if s = "" then (k, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 214 | else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 215 | else (k, s) | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 216 | end; | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 217 | |
| 5825 | 218 | |
| 219 | ||
| 27814 | 220 | (** associated values **) | 
| 221 | ||
| 222 | (* access values *) | |
| 223 | ||
| 224 | fun get_value (Token (_, _, Value v)) = v | |
| 225 | | get_value _ = NONE; | |
| 226 | ||
| 227 | fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) | |
| 228 | | map_value _ tok = tok; | |
| 229 | ||
| 230 | ||
| 231 | (* make values *) | |
| 232 | ||
| 233 | fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); | |
| 234 | ||
| 235 | val mk_text = mk_value "<text>" o Text; | |
| 236 | val mk_typ = mk_value "<typ>" o Typ; | |
| 237 | val mk_term = mk_value "<term>" o Term; | |
| 238 | val mk_fact = mk_value "<fact>" o Fact; | |
| 239 | val mk_attribute = mk_value "<attribute>" o Attribute; | |
| 240 | ||
| 241 | ||
| 242 | (* static binding *) | |
| 243 | ||
| 244 | (*1st stage: make empty slots assignable*) | |
| 32738 | 245 | fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) | 
| 27814 | 246 | | assignable tok = tok; | 
| 247 | ||
| 248 | (*2nd stage: assign values as side-effect of scanning*) | |
| 249 | fun assign v (Token (_, _, Assignable r)) = r := v | |
| 250 | | assign _ _ = (); | |
| 251 | ||
| 252 | (*3rd stage: static closure of final values*) | |
| 32738 | 253 | fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | 
| 27814 | 254 | | closure tok = tok; | 
| 255 | ||
| 256 | ||
| 257 | ||
| 5825 | 258 | (** scanners **) | 
| 259 | ||
| 30573 | 260 | open Basic_Symbol_Pos; | 
| 5825 | 261 | |
| 30573 | 262 | fun !!! msg = Symbol_Pos.!!! ("Outer lexical error: " ^ msg);
 | 
| 5825 | 263 | |
| 264 | ||
| 265 | (* scan symbolic idents *) | |
| 266 | ||
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
40531diff
changeset | 267 | val is_sym_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~"); | 
| 5825 | 268 | |
| 8231 | 269 | val scan_symid = | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 270 | Scan.many1 (is_sym_char o Symbol_Pos.symbol) || | 
| 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 271 | Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; | 
| 5825 | 272 | |
| 8231 | 273 | fun is_symid str = | 
| 274 | (case try Symbol.explode str of | |
| 15531 | 275 | SOME [s] => Symbol.is_symbolic s orelse is_sym_char s | 
| 276 | | SOME ss => forall is_sym_char ss | |
| 8231 | 277 | | _ => false); | 
| 278 | ||
| 27814 | 279 | fun ident_or_symbolic "begin" = false | 
| 280 | | ident_or_symbolic ":" = true | |
| 281 | | ident_or_symbolic "::" = true | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 282 | | ident_or_symbolic s = Lexicon.is_identifier s orelse is_symid s; | 
| 5825 | 283 | |
| 284 | ||
| 285 | (* scan verbatim text *) | |
| 286 | ||
| 287 | val scan_verb = | |
| 27769 | 288 | $$$ "*" --| Scan.ahead (~$$$ "}") || | 
| 289 | Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; | |
| 5825 | 290 | |
| 291 | val scan_verbatim = | |
| 30573 | 292 |   (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
 | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 293 | (Symbol_Pos.change_prompt | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 294 | ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); | 
| 5825 | 295 | |
| 296 | ||
| 297 | (* scan space *) | |
| 298 | ||
| 19305 | 299 | fun is_space s = Symbol.is_blank s andalso s <> "\n"; | 
| 5825 | 300 | |
| 301 | val scan_space = | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 302 | Scan.many1 (is_space o Symbol_Pos.symbol) @@@ Scan.optional ($$$ "\n") [] || | 
| 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 303 | Scan.many (is_space o Symbol_Pos.symbol) @@@ $$$ "\n"; | 
| 5825 | 304 | |
| 305 | ||
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 306 | (* scan comment *) | 
| 5825 | 307 | |
| 308 | val scan_comment = | |
| 30573 | 309 | Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos); | 
| 5825 | 310 | |
| 311 | ||
| 27663 | 312 | |
| 27769 | 313 | (** token sources **) | 
| 5825 | 314 | |
| 27769 | 315 | fun source_proper src = src |> Source.filter is_proper; | 
| 5825 | 316 | |
| 23678 | 317 | local | 
| 318 | ||
| 27769 | 319 | fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 320 | |
| 27799 | 321 | fun token k ss = | 
| 30573 | 322 | Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.untabify_content ss), Slot); | 
| 27799 | 323 | |
| 324 | fun token_range k (pos1, (ss, pos2)) = | |
| 30573 | 325 | Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.untabify_content ss), Slot); | 
| 23678 | 326 | |
| 27769 | 327 | fun scan (lex1, lex2) = !!! "bad input" | 
| 42503 | 328 | (Symbol_Pos.scan_string_qq >> token_range String || | 
| 329 | Symbol_Pos.scan_string_bq >> token_range AltString || | |
| 27799 | 330 | scan_verbatim >> token_range Verbatim || | 
| 331 | scan_comment >> token_range Comment || | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 332 | scan_space >> token Space || | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 333 | Scan.one (Symbol.is_sync o Symbol_Pos.symbol) >> (token Sync o single) || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 334 | (Scan.max token_leq | 
| 27769 | 335 | (Scan.max token_leq | 
| 336 | (Scan.literal lex2 >> pair Command) | |
| 337 | (Scan.literal lex1 >> pair Keyword)) | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 338 | (Lexicon.scan_longid >> pair LongIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 339 | Lexicon.scan_id >> pair Ident || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 340 | Lexicon.scan_var >> pair Var || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 341 | Lexicon.scan_tid >> pair TypeIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 342 | Lexicon.scan_tvar >> pair TypeVar || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 343 | Lexicon.scan_float >> pair Float || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 344 | Lexicon.scan_nat >> pair Nat || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 345 | scan_symid >> pair SymIdent) >> uncurry token)); | 
| 27769 | 346 | |
| 347 | fun recover msg = | |
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 348 | Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 349 | >> (single o token (Error msg)); | 
| 23678 | 350 | |
| 351 | in | |
| 5825 | 352 | |
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 353 | fun source' {do_recover} get_lex =
 | 
| 30573 | 354 | Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 355 | (Option.map (rpair recover) do_recover); | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 356 | |
| 5825 | 357 | fun source do_recover get_lex pos src = | 
| 30573 | 358 | Symbol_Pos.source pos src | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 359 | |> source' do_recover get_lex; | 
| 23678 | 360 | |
| 361 | end; | |
| 5825 | 362 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 363 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 364 | (* read_antiq *) | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 365 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 366 | fun read_antiq lex scan (syms, pos) = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 367 | let | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 368 |     fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 369 |       "@{" ^ Symbol_Pos.content syms ^ "}");
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 370 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 371 | val res = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 372 | Source.of_list syms | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 373 |       |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 374 | |> source_proper | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 375 | |> Source.source stopper (Scan.error (Scan.bulk scan)) NONE | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 376 | |> Source.exhaust; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 377 | in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 378 | |
| 5825 | 379 | end; |