src/Pure/Isar/outer_lex.ML
author boehmes
Wed, 12 May 2010 23:53:59 +0200
changeset 36895 a96f9793d9c5
parent 32738 15bb09ca0378
permissions -rw-r--r--
split monolithic Z3 proof reconstruction structure into separate structures, use one set of schematic theorems for all uncertain proof rules (to extend proof reconstruction by missing cases), added several schematic theorems, improved abstraction of goals (abstract all uninterpreted sub-terms, only leave builtin symbols)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    Author:     Markus Wenzel, TU Muenchen
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     4
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
  datatype token_kind =
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    10
    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    11
    Nat | String | AltString | Verbatim | Space | Comment | InternalValue |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    12
    Malformed | Error of string | Sync | EOF
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    13
  datatype value =
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    14
    Text of string | Typ of typ | Term of term | Fact of thm list |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    15
    Attribute of morphism -> attribute
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    16
  type token
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    17
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val position_of: token -> Position.T
27752
ea7d573e565f removed obsolete range_of (already included in position);
wenzelm
parents: 27747
diff changeset
    19
  val end_position_of: token -> Position.T
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val pos_of: token -> string
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
    21
  val eof: token
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
    22
  val is_eof: token -> bool
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
    23
  val not_eof: token -> bool
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
    24
  val not_sync: token -> bool
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
    25
  val stopper: token Scan.stopper
23721
aa088ef9237c added kind_of;
wenzelm
parents: 23682
diff changeset
    26
  val kind_of: token -> token_kind
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    27
  val is_kind: token_kind -> token -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    28
  val keyword_with: (string -> bool) -> token -> bool
16029
070ed43b86f8 added ident_with;
wenzelm
parents: 15531
diff changeset
    29
  val ident_with: (string -> bool) -> token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    30
  val is_proper: token -> bool
9130
ff8789b49d2e added !!!;
wenzelm
parents: 9051
diff changeset
    31
  val is_semicolon: token -> bool
17069
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
    32
  val is_comment: token -> bool
8580
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
    33
  val is_begin_ignore: token -> bool
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
    34
  val is_end_ignore: token -> bool
17069
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
    35
  val is_blank: token -> bool
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
    36
  val is_newline: token -> bool
27747
d41abb7bc08a token: maintain of source, which retains original position information;
wenzelm
parents: 27733
diff changeset
    37
  val source_of: token -> string
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    38
  val source_position_of: token -> Symbol_Pos.text * Position.T
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    39
  val content_of: token -> string
14991
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
    40
  val unparse: token -> string
23788
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
    41
  val text_of: token -> string * string
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    42
  val get_value: token -> value option
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    43
  val map_value: (value -> value) -> token -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    44
  val mk_text: string -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    45
  val mk_typ: typ -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    46
  val mk_term: term -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    47
  val mk_fact: thm list -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    48
  val mk_attribute: (morphism -> attribute) -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    49
  val assignable: token -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    50
  val assign: value option -> token -> unit
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    51
  val closure: token -> token
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    52
  val ident_or_symbolic: string -> bool
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    53
  val !!! : string -> (Symbol_Pos.T list -> 'a) -> Symbol_Pos.T list -> 'a
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
    54
  val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source
27835
ff8b8513965a Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents: 27814
diff changeset
    55
  val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    56
    (Symbol_Pos.T, 'a) Source.source -> (token, (Symbol_Pos.T, 'a) Source.source) Source.source
27835
ff8b8513965a Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents: 27814
diff changeset
    57
  val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
    58
    Position.T -> (Symbol.symbol, 'a) Source.source -> (token,
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    59
      (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
30586
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
    60
  val read_antiq: Scan.lexicon -> (token list -> 'a * token list) ->
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
    61
    Symbol_Pos.T list * Position.T -> 'a
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    62
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    64
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    65
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    66
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    67
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    68
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    69
(* token values *)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    70
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    71
(*The value slot assigns an (optional) internal value to a token,
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    72
  usually as a side-effect of special scanner setup (see also
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    73
  args.ML).  Note that an assignable ref designates an intermediate
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    74
  state of internalization -- it is NOT meant to persist.*)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    75
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    76
datatype value =
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    77
  Text of string |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    78
  Typ of typ |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    79
  Term of term |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    80
  Fact of thm list |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    81
  Attribute of morphism -> attribute;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    82
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    83
datatype slot =
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    84
  Slot |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    85
  Value of value option |
32738
15bb09ca0378 explicit indication of Unsynchronized.ref;
wenzelm
parents: 30586
diff changeset
    86
  Assignable of value option Unsynchronized.ref;
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    87
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    88
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    91
datatype token_kind =
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    92
  Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    93
  Nat | String | AltString | Verbatim | Space | Comment | InternalValue |
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
    94
  Malformed | Error of string | Sync | EOF;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
    96
datatype token = Token of (Symbol_Pos.text * Position.range) * (token_kind * string) * slot;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    97
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    98
val str_of_kind =
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    99
 fn Command => "command"
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   100
  | Keyword => "keyword"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   102
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   103
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   104
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   105
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   106
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   107
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   108
  | String => "string"
17164
a786e1a1ce02 added AltString token (delimited by ASCII back-quotes);
wenzelm
parents: 17069
diff changeset
   109
  | AltString => "back-quoted string"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   110
  | Verbatim => "verbatim text"
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   111
  | Space => "white space"
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   112
  | Comment => "comment text"
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   113
  | InternalValue => "internal value"
23729
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   114
  | Malformed => "malformed symbolic character"
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   115
  | Error _ => "bad input"
23788
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   116
  | Sync => "sync marker"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   117
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   118
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   119
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   120
(* position *)
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   121
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   122
fun position_of (Token ((_, (pos, _)), _, _)) = pos;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   123
fun end_position_of (Token ((_, (_, pos)), _, _)) = pos;
27663
098798321622 maintain token range;
wenzelm
parents: 27358
diff changeset
   124
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   126
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   127
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   128
(* control tokens *)
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   129
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   130
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   131
val eof = mk_eof Position.none;
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   132
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   133
fun is_eof (Token (_, (EOF, _), _)) = true
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   134
  | is_eof _ = false;
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   135
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   136
val not_eof = not o is_eof;
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   137
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   138
fun not_sync (Token (_, (Sync, _), _)) = false
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   139
  | not_sync _ = true;
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   140
27752
ea7d573e565f removed obsolete range_of (already included in position);
wenzelm
parents: 27747
diff changeset
   141
val stopper =
ea7d573e565f removed obsolete range_of (already included in position);
wenzelm
parents: 27747
diff changeset
   142
  Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
27733
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   143
d3d7038fb7b5 abstract type Scan.stopper, position taken from last input token;
wenzelm
parents: 27663
diff changeset
   144
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   145
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   146
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   147
fun kind_of (Token (_, (k, _), _)) = k;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   148
fun is_kind k (Token (_, (k', _), _)) = k = k';
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   149
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   150
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   151
  | keyword_with _ _ = false;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   153
fun ident_with pred (Token (_, (Ident, x), _)) = pred x
16029
070ed43b86f8 added ident_with;
wenzelm
parents: 15531
diff changeset
   154
  | ident_with _ _ = false;
070ed43b86f8 added ident_with;
wenzelm
parents: 15531
diff changeset
   155
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   156
fun is_proper (Token (_, (Space, _), _)) = false
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   157
  | is_proper (Token (_, (Comment, _), _)) = false
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   158
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   159
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   160
fun is_semicolon (Token (_, (Keyword, ";"), _)) = true
9130
ff8789b49d2e added !!!;
wenzelm
parents: 9051
diff changeset
   161
  | is_semicolon _ = false;
ff8789b49d2e added !!!;
wenzelm
parents: 9051
diff changeset
   162
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   163
fun is_comment (Token (_, (Comment, _), _)) = true
17069
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
   164
  | is_comment _ = false;
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
   165
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   166
fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true
8580
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   167
  | is_begin_ignore _ = false;
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   168
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   169
fun is_end_ignore (Token (_, (Comment, ">"), _)) = true
8580
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   170
  | is_end_ignore _ = false;
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   171
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   172
17069
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
   173
(* blanks and newlines -- space tokens obey lines *)
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   174
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   175
fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x)
17069
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
   176
  | is_blank _ = false;
ee08b2466a09 clarify is_newline vs. is_blank;
wenzelm
parents: 16029
diff changeset
   177
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   178
fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   179
  | is_newline _ = false;
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   180
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
14991
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   182
(* token content *)
9155
adfa40218e06 OuterLex.name_of: include val;
wenzelm
parents: 9130
diff changeset
   183
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   184
fun source_of (Token ((source, (pos, _)), _, _)) =
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   185
  YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source]));
25642
ebdff0dca2a5 text_of: made even more robust against recurrent errors;
wenzelm
parents: 25582
diff changeset
   186
27885
76b51cd0a37c renamed T.source_of' to T.source_position_of;
wenzelm
parents: 27873
diff changeset
   187
fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos);
27873
34d61938e27a added source_of';
wenzelm
parents: 27856
diff changeset
   188
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   189
fun content_of (Token (_, (_, x), _)) = x;
27747
d41abb7bc08a token: maintain of source, which retains original position information;
wenzelm
parents: 27733
diff changeset
   190
d41abb7bc08a token: maintain of source, which retains original position information;
wenzelm
parents: 27733
diff changeset
   191
d41abb7bc08a token: maintain of source, which retains original position information;
wenzelm
parents: 27733
diff changeset
   192
(* unparse *)
d41abb7bc08a token: maintain of source, which retains original position information;
wenzelm
parents: 27733
diff changeset
   193
18547
d1978038b945 unparse String/AltString: escape quotes;
wenzelm
parents: 17164
diff changeset
   194
fun escape q =
d1978038b945 unparse String/AltString: escape quotes;
wenzelm
parents: 17164
diff changeset
   195
  implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode;
d1978038b945 unparse String/AltString: escape quotes;
wenzelm
parents: 17164
diff changeset
   196
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   197
fun unparse (Token (_, (kind, x), _)) =
14991
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   198
  (case kind of
18547
d1978038b945 unparse String/AltString: escape quotes;
wenzelm
parents: 17164
diff changeset
   199
    String => x |> quote o escape "\""
d1978038b945 unparse String/AltString: escape quotes;
wenzelm
parents: 17164
diff changeset
   200
  | AltString => x |> enclose "`" "`" o escape "`"
14991
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   201
  | Verbatim => x |> enclose "{*" "*}"
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   202
  | Comment => x |> enclose "(*" "*)"
28034
33050286e65d text_of Malformed: explode with spaces -- Output.output/escape ineffective by default;
wenzelm
parents: 27885
diff changeset
   203
  | Malformed => space_implode " " (explode x)
23729
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   204
  | Sync => ""
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   205
  | EOF => ""
14991
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   206
  | _ => x);
26fb63c4acb5 added unparse;
wenzelm
parents: 14981
diff changeset
   207
23788
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   208
fun text_of tok =
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   209
  if is_semicolon tok then ("terminator", "")
23729
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   210
  else
23788
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   211
    let
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   212
      val k = str_of_kind (kind_of tok);
25642
ebdff0dca2a5 text_of: made even more robust against recurrent errors;
wenzelm
parents: 25582
diff changeset
   213
      val s = unparse tok
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   214
        handle ERROR _ => Symbol.separate_chars (content_of tok);
23788
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   215
    in
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   216
      if s = "" then (k, "")
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   217
      else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "")
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   218
      else (k, s)
54ce229dc858 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 23729
diff changeset
   219
    end;
23729
d1ba656978c5 separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents: 23721
diff changeset
   220
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   221
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   222
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   223
(** associated values **)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   224
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   225
(* access values *)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   226
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   227
fun get_value (Token (_, _, Value v)) = v
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   228
  | get_value _ = NONE;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   229
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   230
fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v)))
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   231
  | map_value _ tok = tok;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   232
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   233
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   234
(* make values *)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   235
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   236
fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v));
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   237
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   238
val mk_text = mk_value "<text>" o Text;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   239
val mk_typ = mk_value "<typ>" o Typ;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   240
val mk_term = mk_value "<term>" o Term;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   241
val mk_fact = mk_value "<fact>" o Fact;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   242
val mk_attribute = mk_value "<attribute>" o Attribute;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   243
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   244
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   245
(* static binding *)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   246
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   247
(*1st stage: make empty slots assignable*)
32738
15bb09ca0378 explicit indication of Unsynchronized.ref;
wenzelm
parents: 30586
diff changeset
   248
fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE))
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   249
  | assignable tok = tok;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   250
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   251
(*2nd stage: assign values as side-effect of scanning*)
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   252
fun assign v (Token (_, _, Assignable r)) = r := v
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   253
  | assign _ _ = ();
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   254
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   255
(*3rd stage: static closure of final values*)
32738
15bb09ca0378 explicit indication of Unsynchronized.ref;
wenzelm
parents: 30586
diff changeset
   256
fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v)
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   257
  | closure tok = tok;
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   258
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   259
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   260
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   261
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   262
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   263
open Basic_Symbol_Pos;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   264
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   265
fun !!! msg = Symbol_Pos.!!! ("Outer lexical error: " ^ msg);
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   266
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   267
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   268
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   269
20664
ffbc5a57191a member (op =);
wenzelm
parents: 20112
diff changeset
   270
val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   271
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   272
val scan_symid =
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   273
  Scan.many1 (is_sym_char o symbol) ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   274
  Scan.one (Symbol.is_symbolic o symbol) >> single;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   275
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   276
fun is_symid str =
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   277
  (case try Symbol.explode str of
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 15224
diff changeset
   278
    SOME [s] => Symbol.is_symbolic s orelse is_sym_char s
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 15224
diff changeset
   279
  | SOME ss => forall is_sym_char ss
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   280
  | _ => false);
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   281
27814
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   282
fun ident_or_symbolic "begin" = false
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   283
  | ident_or_symbolic ":" = true
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   284
  | ident_or_symbolic "::" = true
05a50886dacb unified Args.T with OuterLex.token;
wenzelm
parents: 27799
diff changeset
   285
  | ident_or_symbolic s = Syntax.is_identifier s orelse is_symid s;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   286
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   287
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   288
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   289
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   290
val scan_verb =
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   291
  $$$ "*" --| Scan.ahead (~$$$ "}") ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   292
  Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   293
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   294
val scan_verbatim =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   295
  (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
30586
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   296
    (Symbol_Pos.change_prompt
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   297
      ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos)));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   298
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   299
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   300
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   301
19305
5c16895d548b avoid polymorphic equality;
wenzelm
parents: 18547
diff changeset
   302
fun is_space s = Symbol.is_blank s andalso s <> "\n";
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   303
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   304
val scan_space =
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   305
  Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   306
  Scan.many (is_space o symbol) @@@ $$$ "\n";
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   307
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   308
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   309
(* scan comment *)
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   310
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   311
val scan_comment =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   312
  Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos);
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   313
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   314
23678
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   315
(* scan malformed symbols *)
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   316
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   317
val scan_malformed =
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   318
  $$$ Symbol.malformed |--
30586
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   319
    Symbol_Pos.change_prompt (Scan.many (Symbol.is_regular o symbol))
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   320
  --| Scan.option ($$$ Symbol.end_malformed);
27752
ea7d573e565f removed obsolete range_of (already included in position);
wenzelm
parents: 27747
diff changeset
   321
ea7d573e565f removed obsolete range_of (already included in position);
wenzelm
parents: 27747
diff changeset
   322
27663
098798321622 maintain token range;
wenzelm
parents: 27358
diff changeset
   323
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   324
(** token sources **)
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   325
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   326
fun source_proper src = src |> Source.filter is_proper;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   327
23678
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   328
local
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   329
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   330
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   331
27799
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27780
diff changeset
   332
fun token k ss =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   333
  Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.untabify_content ss), Slot);
27799
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27780
diff changeset
   334
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27780
diff changeset
   335
fun token_range k (pos1, (ss, pos2)) =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   336
  Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.untabify_content ss), Slot);
23678
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   337
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   338
fun scan (lex1, lex2) = !!! "bad input"
30586
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   339
  (Symbol_Pos.scan_string >> token_range String ||
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   340
    Symbol_Pos.scan_alt_string >> token_range AltString ||
27799
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27780
diff changeset
   341
    scan_verbatim >> token_range Verbatim ||
52f07d5292cd tuned SymbolPos interface;
wenzelm
parents: 27780
diff changeset
   342
    scan_comment >> token_range Comment ||
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   343
    scan_space >> token Space ||
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   344
    scan_malformed >> token Malformed ||
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   345
    Scan.one (Symbol.is_sync o symbol) >> (token Sync o single) ||
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   346
    (Scan.max token_leq
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   347
      (Scan.max token_leq
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   348
        (Scan.literal lex2 >> pair Command)
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   349
        (Scan.literal lex1 >> pair Keyword))
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   350
      (Syntax.scan_longid >> pair LongIdent ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   351
        Syntax.scan_id >> pair Ident ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   352
        Syntax.scan_var >> pair Var ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   353
        Syntax.scan_tid >> pair TypeIdent ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   354
        Syntax.scan_tvar >> pair TypeVar ||
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   355
        Syntax.scan_nat >> pair Nat ||
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   356
        scan_symid >> pair SymIdent) >> uncurry token));
27769
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   357
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   358
fun recover msg =
ad50c38ef842 improved position handling due to SymbolPos.T;
wenzelm
parents: 27752
diff changeset
   359
  Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol)
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   360
  >> (single o token (Error msg));
23678
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   361
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   362
in
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   363
27835
ff8b8513965a Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents: 27814
diff changeset
   364
fun source' {do_recover} get_lex =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   365
  Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   366
    (Option.map (rpair recover) do_recover);
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   367
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   368
fun source do_recover get_lex pos src =
30573
49899f26fbd1 de-camelized Symbol_Pos;
wenzelm
parents: 29606
diff changeset
   369
  Symbol_Pos.source pos src
27780
7d0910f662f7 more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents: 27769
diff changeset
   370
  |> source' do_recover get_lex;
23678
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   371
f5d315390edc Malformed token: error msg;
wenzelm
parents: 22873
diff changeset
   372
end;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   373
30586
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   374
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   375
(* read_antiq *)
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   376
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   377
fun read_antiq lex scan (syms, pos) =
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   378
  let
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   379
    fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   380
      "@{" ^ Symbol_Pos.content syms ^ "}");
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   381
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   382
    val res =
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   383
      Source.of_list syms
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   384
      |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   385
      |> source_proper
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   386
      |> Source.source stopper (Scan.error (Scan.bulk scan)) NONE
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   387
      |> Source.exhaust;
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   388
  in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end;
9674f64a0702 moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents: 30573
diff changeset
   389
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   390
end;