src/Pure/Isar/outer_lex.ML
author wenzelm
Tue, 07 Sep 1999 17:21:44 +0200
changeset 7506 08a88d4ebd54
parent 7477 c7caea1ce78c
child 7682 46de8064c93c
permissions -rw-r--r--
Method.refine_no_facts;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
    Author:     Markus Wenzel, TU Muenchen
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     4
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    11
    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    12
    Nat | String | Verbatim | Ignore | Sync | EOF
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    13
  type token
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    14
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    15
  val stopper: token * (token -> bool)
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    16
  val not_sync: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    17
  val not_eof: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val position_of: token -> Position.T
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    19
  val pos_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val is_kind: token_kind -> token -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    21
  val keyword_with: (string -> bool) -> token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    22
  val name_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    23
  val is_proper: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    24
  val val_of: token -> string
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
    25
  val is_sid: string -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    26
  val scan: (Scan.lexicon * Scan.lexicon) ->
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    27
    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    28
  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    29
    Position.T -> (Symbol.symbol, 'a) Source.source ->
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    30
    (token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    31
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    32
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    33
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    34
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    35
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    36
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    37
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    38
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    39
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    40
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    41
datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    42
  Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    43
  Nat | String | Verbatim | Ignore | Sync | EOF;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    44
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    45
datatype token = Token of Position.T * (token_kind * string);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    46
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    47
val str_of_kind =
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    48
 fn Command => "command"
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    49
  | Keyword => "keyword"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    50
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    51
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    52
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    53
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    54
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    55
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    56
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    57
  | String => "string"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    58
  | Verbatim => "verbatim text"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    59
  | Ignore => "ignored text"
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    60
  | Sync => "sync marker"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    61
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    62
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    64
(* sync token *)
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    65
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    66
fun not_sync (Token (_, (Sync, _))) = false
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    67
  | not_sync _ = true;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    68
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    69
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    70
(* eof token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    71
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    72
val eof = Token (Position.none, (EOF, ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    73
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    74
fun is_eof (Token (_, (EOF, _))) = true
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    75
  | is_eof _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    76
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    77
val stopper = (eof, is_eof);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    78
val not_eof = not o is_eof;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    79
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    80
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    81
(* get position *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    82
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    83
fun position_of (Token (pos, _)) = pos;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    84
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    85
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    86
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    87
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    88
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
fun is_kind k (Token (_, (k', _))) = k = k';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    91
fun keyword_with pred (Token (_, (Keyword, x))) = pred x
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    92
  | keyword_with _ _ = false;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    93
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    94
fun name_of (Token (_, (k, _))) = str_of_kind k;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    96
fun is_proper (Token (_, (Ignore, _))) = false
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    97
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    98
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    99
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   100
(* value of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   102
fun val_of (Token (_, (_, x))) = x;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   103
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   104
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   105
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   106
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   107
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   108
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   109
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   110
fun change_prompt scan = Scan.prompt "# " scan;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   111
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   112
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   113
(* diagnostics *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   114
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   115
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   116
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   117
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   118
(* line numbering *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   119
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   120
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   121
val keep_line = Scan.lift;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   122
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   123
val scan_blank =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   124
  incr_line ($$ "\n") ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
  keep_line (Scan.one Symbol.is_blank);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   126
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   127
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   128
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   129
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   130
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   131
fun is_sym_char s = s mem sym_chars;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   132
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   133
val scan_symid = Scan.any1 is_sym_char >> implode;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   134
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   135
fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s);
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   136
val is_sid = is_symid orf Syntax.is_identifier;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   137
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   138
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   139
(* scan strings *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   140
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   141
val scan_str =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   142
  scan_blank >> K Symbol.space ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   143
  keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) ||
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   144
  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   145
    Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   146
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   147
val scan_string =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   148
  keep_line ($$ "\"") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   149
    !! (lex_err (K "missing quote at end of string"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   150
      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   151
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   153
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   154
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   155
val scan_verb =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   156
  scan_blank ||
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   157
  keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   158
  keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   159
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   160
val scan_verbatim =
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   161
  keep_line ($$ "{" -- $$ "*") |--
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   162
    !! (lex_err (K "missing end of verbatim text"))
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   163
      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   164
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   165
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   166
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   167
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   168
val is_space = Symbol.is_blank andf not_equal "\n";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   169
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   170
val scan_space =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   171
  keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   172
  keep_line (Scan.any is_space) |-- incr_line ($$ "\n");
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   173
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   174
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   175
(* scan nested comments *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   176
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   177
val scan_cmt =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   178
  Scan.lift scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   179
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   180
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   182
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   183
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   184
val scan_comment =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   185
  keep_line ($$ "(" -- $$ "*") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   186
    !! (lex_err (K "missing end of comment"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   187
      (change_prompt
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   188
        (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   189
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   190
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   191
(* scan token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   192
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   193
fun scan (lex1, lex2) (pos, cs) =
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   194
  let
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   195
    fun token k x = Token (pos, (k, x));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   196
    fun ignore _ = token Ignore "";
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   197
    fun sync _ = token Sync Symbol.sync;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   198
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   199
    val scanner =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   200
      scan_string >> token String ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   201
      scan_verbatim >> token Verbatim ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   202
      scan_space >> ignore ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   203
      scan_comment >> ignore ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   204
      keep_line (Scan.one Symbol.is_sync >> sync) ||
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   205
      keep_line (Scan.max token_leq
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   206
        (Scan.max token_leq
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   207
          (Scan.literal lex1 >> (token Keyword o implode))
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   208
          (Scan.literal lex2 >> (token Command o implode)))
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   209
        (Syntax.scan_longid >> token LongIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   210
          Syntax.scan_id >> token Ident ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   211
          Syntax.scan_var >> token Var ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   212
          Syntax.scan_tid >> token TypeIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   213
          Syntax.scan_tvar >> token TypeVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   214
          Syntax.scan_nat >> token Nat ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   215
          scan_symid >> token SymIdent));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   216
  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   217
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   218
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   219
(* source of (proper) tokens *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   220
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   221
val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   222
fun recover xs = keep_line (Scan.any1 is_junk) xs;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   223
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   224
fun source do_recover get_lex pos src =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   225
  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   226
    (if do_recover then Some recover else None) src
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   227
  |> Source.filter is_proper;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   228
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   229
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   230
end;