| author | wenzelm | 
| Tue, 09 Apr 2013 12:29:36 +0200 | |
| changeset 51653 | 97de25c51b2d | 
| parent 51266 | 3007d0bc9cb1 | 
| child 54519 | 5fed81762406 | 
| permissions | -rw-r--r-- | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 1 | (* Title: Pure/Isar/token.ML | 
| 5825 | 2 | Author: Markus Wenzel, TU Muenchen | 
| 3 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 5825 | 5 | *) | 
| 6 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 7 | signature TOKEN = | 
| 5825 | 8 | sig | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 9 | datatype kind = | 
| 27814 | 10 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 11 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 12 | Error of string | Sync | EOF | 
| 48905 | 13 |   type file = {src_path: Path.T, text: string, pos: Position.T}
 | 
| 27814 | 14 | datatype value = | 
| 15 | Text of string | Typ of typ | Term of term | Fact of thm list | | |
| 48905 | 16 | Attribute of morphism -> attribute | Files of file list | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 17 | type T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 18 | val str_of_kind: kind -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 19 | val position_of: T -> Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 20 | val end_position_of: T -> Position.T | 
| 49866 | 21 | val position_range_of: T list -> Position.range | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 22 | val pos_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 23 | val eof: T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 24 | val is_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 25 | val not_eof: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 26 | val not_sync: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 27 | val stopper: T Scan.stopper | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 28 | val kind_of: T -> kind | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 29 | val is_kind: kind -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 30 | val keyword_with: (string -> bool) -> T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 31 | val ident_with: (string -> bool) -> T -> bool | 
| 46811 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 wenzelm parents: 
45666diff
changeset | 32 | val is_command: T -> bool | 
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 33 | val is_name: T -> bool | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 34 | val is_proper: T -> bool | 
| 51266 
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
 wenzelm parents: 
50239diff
changeset | 35 | val is_improper: T -> bool | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 36 | val is_semicolon: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 37 | val is_comment: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 38 | val is_begin_ignore: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 39 | val is_end_ignore: T -> bool | 
| 48749 
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
 wenzelm parents: 
48743diff
changeset | 40 | val is_error: T -> bool | 
| 48771 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 41 | val is_space: T -> bool | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 42 | val is_blank: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 43 | val is_newline: T -> bool | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 44 | val source_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 45 | val source_position_of: T -> Symbol_Pos.text * Position.T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 46 | val content_of: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 47 | val unparse: T -> string | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 48 | val text_of: T -> string * string | 
| 48905 | 49 | val get_files: T -> file list option | 
| 50 | val put_files: file list -> T -> T | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 51 | val get_value: T -> value option | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 52 | val map_value: (value -> value) -> T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 53 | val mk_text: string -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 54 | val mk_typ: typ -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 55 | val mk_term: term -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 56 | val mk_fact: thm list -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 57 | val mk_attribute: (morphism -> attribute) -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 58 | val assignable: T -> T | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 59 | val assign: value option -> T -> unit | 
| 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 60 | val closure: T -> T | 
| 27814 | 61 | val ident_or_symbolic: string -> bool | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 62 | val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source | 
| 48741 | 63 |   val source': {do_recover: bool option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 64 | (Symbol_Pos.T, 'a) Source.source -> (T, (Symbol_Pos.T, 'a) Source.source) Source.source | 
| 48741 | 65 |   val source: {do_recover: bool option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 66 | Position.T -> (Symbol.symbol, 'a) Source.source -> (T, | 
| 30573 | 67 | (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source | 
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 68 | val read_antiq: Scan.lexicon -> (T list -> 'a * T list) -> Symbol_Pos.T list * Position.T -> 'a | 
| 5825 | 69 | end; | 
| 70 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 71 | structure Token: TOKEN = | 
| 5825 | 72 | struct | 
| 73 | ||
| 74 | (** tokens **) | |
| 75 | ||
| 27814 | 76 | (* token values *) | 
| 77 | ||
| 78 | (*The value slot assigns an (optional) internal value to a token, | |
| 79 | usually as a side-effect of special scanner setup (see also | |
| 80 | args.ML). Note that an assignable ref designates an intermediate | |
| 81 | state of internalization -- it is NOT meant to persist.*) | |
| 82 | ||
| 48905 | 83 | type file = {src_path: Path.T, text: string, pos: Position.T};
 | 
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 84 | |
| 27814 | 85 | datatype value = | 
| 86 | Text of string | | |
| 87 | Typ of typ | | |
| 88 | Term of term | | |
| 89 | Fact of thm list | | |
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 90 | Attribute of morphism -> attribute | | 
| 48905 | 91 | Files of file list; | 
| 27814 | 92 | |
| 93 | datatype slot = | |
| 94 | Slot | | |
| 95 | Value of value option | | |
| 32738 | 96 | Assignable of value option Unsynchronized.ref; | 
| 27814 | 97 | |
| 98 | ||
| 5825 | 99 | (* datatype token *) | 
| 100 | ||
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 101 | datatype kind = | 
| 27814 | 102 | Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 103 | Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue | | 
| 40958 
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
 wenzelm parents: 
40627diff
changeset | 104 | Error of string | Sync | EOF; | 
| 5825 | 105 | |
| 36959 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 wenzelm parents: 
32738diff
changeset | 106 | datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot; | 
| 5825 | 107 | |
| 108 | val str_of_kind = | |
| 7026 | 109 | fn Command => "command" | 
| 110 | | Keyword => "keyword" | |
| 5825 | 111 | | Ident => "identifier" | 
| 112 | | LongIdent => "long identifier" | |
| 113 | | SymIdent => "symbolic identifier" | |
| 114 | | Var => "schematic variable" | |
| 115 | | TypeIdent => "type variable" | |
| 116 | | TypeVar => "schematic type variable" | |
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 117 | | Nat => "natural number" | 
| 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38229diff
changeset | 118 | | Float => "floating-point number" | 
| 5825 | 119 | | String => "string" | 
| 17164 
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
 wenzelm parents: 
17069diff
changeset | 120 | | AltString => "back-quoted string" | 
| 5825 | 121 | | Verbatim => "verbatim text" | 
| 7682 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 122 | | Space => "white space" | 
| 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 wenzelm parents: 
7477diff
changeset | 123 | | Comment => "comment text" | 
| 27814 | 124 | | InternalValue => "internal value" | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 125 | | Error _ => "bad input" | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 126 | | Sync => "sync marker" | 
| 48911 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 wenzelm parents: 
48905diff
changeset | 127 | | EOF => "end-of-input"; | 
| 5825 | 128 | |
| 129 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 130 | (* position *) | 
| 5825 | 131 | |
| 27814 | 132 | fun position_of (Token ((_, (pos, _)), _, _)) = pos; | 
| 133 | fun end_position_of (Token ((_, (_, pos)), _, _)) = pos; | |
| 27663 | 134 | |
| 49866 | 135 | fun position_range_of [] = Position.no_range | 
| 136 | | position_range_of toks = (position_of (hd toks), end_position_of (List.last toks)); | |
| 137 | ||
| 48992 | 138 | val pos_of = Position.here o position_of; | 
| 5825 | 139 | |
| 140 | ||
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 141 | (* control tokens *) | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 142 | |
| 27814 | 143 | fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
 | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 144 | val eof = mk_eof Position.none; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 145 | |
| 27814 | 146 | fun is_eof (Token (_, (EOF, _), _)) = true | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 147 | | is_eof _ = false; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 148 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 149 | val not_eof = not o is_eof; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 150 | |
| 27814 | 151 | fun not_sync (Token (_, (Sync, _), _)) = false | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 152 | | not_sync _ = true; | 
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 153 | |
| 27752 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 154 | val stopper = | 
| 
ea7d573e565f
removed obsolete range_of (already included in position);
 wenzelm parents: 
27747diff
changeset | 155 | Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof; | 
| 27733 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 156 | |
| 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 wenzelm parents: 
27663diff
changeset | 157 | |
| 5825 | 158 | (* kind of token *) | 
| 159 | ||
| 27814 | 160 | fun kind_of (Token (_, (k, _), _)) = k; | 
| 161 | fun is_kind k (Token (_, (k', _), _)) = k = k'; | |
| 5825 | 162 | |
| 46811 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 wenzelm parents: 
45666diff
changeset | 163 | val is_command = is_kind Command; | 
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 164 | val is_name = is_kind Ident orf is_kind SymIdent orf is_kind String orf is_kind Nat; | 
| 46811 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 wenzelm parents: 
45666diff
changeset | 165 | |
| 27814 | 166 | fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x | 
| 7026 | 167 | | keyword_with _ _ = false; | 
| 5825 | 168 | |
| 27814 | 169 | fun ident_with pred (Token (_, (Ident, x), _)) = pred x | 
| 16029 | 170 | | ident_with _ _ = false; | 
| 171 | ||
| 27814 | 172 | fun is_proper (Token (_, (Space, _), _)) = false | 
| 173 | | is_proper (Token (_, (Comment, _), _)) = false | |
| 5825 | 174 | | is_proper _ = true; | 
| 175 | ||
| 51266 
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
 wenzelm parents: 
50239diff
changeset | 176 | val is_improper = not o is_proper; | 
| 
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
 wenzelm parents: 
50239diff
changeset | 177 | |
| 27814 | 178 | fun is_semicolon (Token (_, (Keyword, ";"), _)) = true | 
| 9130 | 179 | | is_semicolon _ = false; | 
| 180 | ||
| 27814 | 181 | fun is_comment (Token (_, (Comment, _), _)) = true | 
| 17069 | 182 | | is_comment _ = false; | 
| 183 | ||
| 27814 | 184 | fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true | 
| 8580 | 185 | | is_begin_ignore _ = false; | 
| 186 | ||
| 27814 | 187 | fun is_end_ignore (Token (_, (Comment, ">"), _)) = true | 
| 8580 | 188 | | is_end_ignore _ = false; | 
| 189 | ||
| 48749 
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
 wenzelm parents: 
48743diff
changeset | 190 | fun is_error (Token (_, (Error _, _), _)) = true | 
| 
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
 wenzelm parents: 
48743diff
changeset | 191 | | is_error _ = false; | 
| 
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
 wenzelm parents: 
48743diff
changeset | 192 | |
| 8651 | 193 | |
| 17069 | 194 | (* blanks and newlines -- space tokens obey lines *) | 
| 8651 | 195 | |
| 48771 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 196 | fun is_space (Token (_, (Space, _), _)) = true | 
| 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 197 | | is_space _ = false; | 
| 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 198 | |
| 27814 | 199 | fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | 
| 17069 | 200 | | is_blank _ = false; | 
| 201 | ||
| 27814 | 202 | fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | 
| 8651 | 203 | | is_newline _ = false; | 
| 204 | ||
| 5825 | 205 | |
| 14991 | 206 | (* token content *) | 
| 9155 | 207 | |
| 43731 
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
 wenzelm parents: 
43709diff
changeset | 208 | fun source_of (Token ((source, (pos, _)), (_, x), _)) = | 
| 
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
 wenzelm parents: 
43709diff
changeset | 209 | if YXML.detect x then x | 
| 50201 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 wenzelm parents: 
49866diff
changeset | 210 | else YXML.string_of (XML.Elem (Markup.token (Position.properties_of pos), [XML.Text source])); | 
| 25642 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 wenzelm parents: 
25582diff
changeset | 211 | |
| 27885 | 212 | fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos); | 
| 27873 | 213 | |
| 27814 | 214 | fun content_of (Token (_, (_, x), _)) = x; | 
| 27747 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 215 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 216 | |
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 217 | (* unparse *) | 
| 
d41abb7bc08a
token: maintain of source, which retains original position information;
 wenzelm parents: 
27733diff
changeset | 218 | |
| 27814 | 219 | fun unparse (Token (_, (kind, x), _)) = | 
| 14991 | 220 | (case kind of | 
| 43773 | 221 | String => Symbol_Pos.quote_string_qq x | 
| 222 | | AltString => Symbol_Pos.quote_string_bq x | |
| 223 |   | Verbatim => enclose "{*" "*}" x
 | |
| 224 | | Comment => enclose "(*" "*)" x | |
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 225 | | Sync => "" | 
| 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 226 | | EOF => "" | 
| 14991 | 227 | | _ => x); | 
| 228 | ||
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 229 | fun text_of tok = | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 230 |   if is_semicolon tok then ("terminator", "")
 | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 231 | else | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 232 | let | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 233 | val k = str_of_kind (kind_of tok); | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40290diff
changeset | 234 | val s = unparse tok; | 
| 23788 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 235 | in | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 236 | if s = "" then (k, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 237 | else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "") | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 238 | else (k, s) | 
| 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
23729diff
changeset | 239 | end; | 
| 23729 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 wenzelm parents: 
23721diff
changeset | 240 | |
| 5825 | 241 | |
| 242 | ||
| 27814 | 243 | (** associated values **) | 
| 244 | ||
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 245 | (* inlined file content *) | 
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 246 | |
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 247 | fun get_files (Token (_, _, Value (SOME (Files files)))) = SOME files | 
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 248 | | get_files _ = NONE; | 
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 249 | |
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 250 | fun put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files))) | 
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 251 | | put_files _ tok = | 
| 48992 | 252 |       raise Fail ("Cannot put inlined files here" ^ Position.here (position_of tok));
 | 
| 48867 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 253 | |
| 
e9beabf045ab
some support for inlining file content into outer syntax token language;
 wenzelm parents: 
48771diff
changeset | 254 | |
| 27814 | 255 | (* access values *) | 
| 256 | ||
| 257 | fun get_value (Token (_, _, Value v)) = v | |
| 258 | | get_value _ = NONE; | |
| 259 | ||
| 260 | fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) | |
| 261 | | map_value _ tok = tok; | |
| 262 | ||
| 263 | ||
| 264 | (* make values *) | |
| 265 | ||
| 266 | fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); | |
| 267 | ||
| 268 | val mk_text = mk_value "<text>" o Text; | |
| 269 | val mk_typ = mk_value "<typ>" o Typ; | |
| 270 | val mk_term = mk_value "<term>" o Term; | |
| 271 | val mk_fact = mk_value "<fact>" o Fact; | |
| 272 | val mk_attribute = mk_value "<attribute>" o Attribute; | |
| 273 | ||
| 274 | ||
| 275 | (* static binding *) | |
| 276 | ||
| 277 | (*1st stage: make empty slots assignable*) | |
| 32738 | 278 | fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) | 
| 27814 | 279 | | assignable tok = tok; | 
| 280 | ||
| 281 | (*2nd stage: assign values as side-effect of scanning*) | |
| 282 | fun assign v (Token (_, _, Assignable r)) = r := v | |
| 283 | | assign _ _ = (); | |
| 284 | ||
| 285 | (*3rd stage: static closure of final values*) | |
| 32738 | 286 | fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | 
| 27814 | 287 | | closure tok = tok; | 
| 288 | ||
| 289 | ||
| 290 | ||
| 5825 | 291 | (** scanners **) | 
| 292 | ||
| 30573 | 293 | open Basic_Symbol_Pos; | 
| 5825 | 294 | |
| 48764 | 295 | val err_prefix = "Outer lexical error: "; | 
| 296 | ||
| 297 | fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); | |
| 5825 | 298 | |
| 299 | ||
| 300 | (* scan symbolic idents *) | |
| 301 | ||
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
40531diff
changeset | 302 | val is_sym_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~"); | 
| 5825 | 303 | |
| 8231 | 304 | val scan_symid = | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 305 | Scan.many1 (is_sym_char o Symbol_Pos.symbol) || | 
| 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 306 | Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; | 
| 5825 | 307 | |
| 8231 | 308 | fun is_symid str = | 
| 309 | (case try Symbol.explode str of | |
| 15531 | 310 | SOME [s] => Symbol.is_symbolic s orelse is_sym_char s | 
| 311 | | SOME ss => forall is_sym_char ss | |
| 8231 | 312 | | _ => false); | 
| 313 | ||
| 27814 | 314 | fun ident_or_symbolic "begin" = false | 
| 315 | | ident_or_symbolic ":" = true | |
| 316 | | ident_or_symbolic "::" = true | |
| 50239 | 317 | | ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s; | 
| 5825 | 318 | |
| 319 | ||
| 320 | (* scan verbatim text *) | |
| 321 | ||
| 322 | val scan_verb = | |
| 27769 | 323 | $$$ "*" --| Scan.ahead (~$$$ "}") || | 
| 324 | Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; | |
| 5825 | 325 | |
| 326 | val scan_verbatim = | |
| 30573 | 327 |   (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
 | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 328 | (Symbol_Pos.change_prompt | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 329 | ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); | 
| 5825 | 330 | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 331 | val recover_verbatim = | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 332 |   $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat);
 | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 333 | |
| 5825 | 334 | |
| 335 | (* scan space *) | |
| 336 | ||
| 48771 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 337 | fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n"; | 
| 5825 | 338 | |
| 339 | val scan_space = | |
| 48771 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 340 | Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] || | 
| 
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
 wenzelm parents: 
48764diff
changeset | 341 | Scan.many space_symbol @@@ $$$ "\n"; | 
| 5825 | 342 | |
| 343 | ||
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 344 | (* scan comment *) | 
| 5825 | 345 | |
| 346 | val scan_comment = | |
| 30573 | 347 | Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos); | 
| 5825 | 348 | |
| 349 | ||
| 27663 | 350 | |
| 27769 | 351 | (** token sources **) | 
| 5825 | 352 | |
| 27769 | 353 | fun source_proper src = src |> Source.filter is_proper; | 
| 5825 | 354 | |
| 23678 | 355 | local | 
| 356 | ||
| 27769 | 357 | fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 358 | |
| 27799 | 359 | fun token k ss = | 
| 43709 
717e96cf9527
discontinued special treatment of hard tabulators;
 wenzelm parents: 
42503diff
changeset | 360 | Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); | 
| 27799 | 361 | |
| 362 | fun token_range k (pos1, (ss, pos2)) = | |
| 43709 
717e96cf9527
discontinued special treatment of hard tabulators;
 wenzelm parents: 
42503diff
changeset | 363 | Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.content ss), Slot); | 
| 23678 | 364 | |
| 27769 | 365 | fun scan (lex1, lex2) = !!! "bad input" | 
| 48764 | 366 | (Symbol_Pos.scan_string_qq err_prefix >> token_range String || | 
| 367 | Symbol_Pos.scan_string_bq err_prefix >> token_range AltString || | |
| 27799 | 368 | scan_verbatim >> token_range Verbatim || | 
| 369 | scan_comment >> token_range Comment || | |
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 370 | scan_space >> token Space || | 
| 40525 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 wenzelm parents: 
40523diff
changeset | 371 | Scan.one (Symbol.is_sync o Symbol_Pos.symbol) >> (token Sync o single) || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 372 | (Scan.max token_leq | 
| 27769 | 373 | (Scan.max token_leq | 
| 374 | (Scan.literal lex2 >> pair Command) | |
| 375 | (Scan.literal lex1 >> pair Keyword)) | |
| 42290 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 376 | (Lexicon.scan_longid >> pair LongIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 377 | Lexicon.scan_id >> pair Ident || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 378 | Lexicon.scan_var >> pair Var || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 379 | Lexicon.scan_tid >> pair TypeIdent || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 380 | Lexicon.scan_tvar >> pair TypeVar || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 381 | Lexicon.scan_float >> pair Float || | 
| 
b1f544c84040
discontinued special treatment of structure Lexicon;
 wenzelm parents: 
40958diff
changeset | 382 | Lexicon.scan_nat >> pair Nat || | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 383 | scan_symid >> pair SymIdent) >> uncurry token)); | 
| 27769 | 384 | |
| 385 | fun recover msg = | |
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 386 | (Symbol_Pos.recover_string_qq || | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 387 | Symbol_Pos.recover_string_bq || | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 388 | recover_verbatim || | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 389 | Symbol_Pos.recover_comment || | 
| 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48741diff
changeset | 390 | Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 391 | >> (single o token (Error msg)); | 
| 23678 | 392 | |
| 393 | in | |
| 5825 | 394 | |
| 27835 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 wenzelm parents: 
27814diff
changeset | 395 | fun source' {do_recover} get_lex =
 | 
| 30573 | 396 | Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 397 | (Option.map (rpair recover) do_recover); | 
| 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 398 | |
| 5825 | 399 | fun source do_recover get_lex pos src = | 
| 30573 | 400 | Symbol_Pos.source pos src | 
| 27780 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 wenzelm parents: 
27769diff
changeset | 401 | |> source' do_recover get_lex; | 
| 23678 | 402 | |
| 403 | end; | |
| 5825 | 404 | |
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 405 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 406 | (* read_antiq *) | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 407 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 408 | fun read_antiq lex scan (syms, pos) = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 409 | let | 
| 48992 | 410 |     fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.here pos ^ ":\n" ^
 | 
| 30586 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 411 |       "@{" ^ Symbol_Pos.content syms ^ "}");
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 412 | |
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 413 | val res = | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 414 | Source.of_list syms | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 415 |       |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
 | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 416 | |> source_proper | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 417 | |> Source.source stopper (Scan.error (Scan.bulk scan)) NONE | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 418 | |> Source.exhaust; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 419 | in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; | 
| 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 wenzelm parents: 
30573diff
changeset | 420 | |
| 5825 | 421 | end; |