src/Pure/Isar/outer_lex.ML
author wenzelm
Thu, 07 Oct 1999 12:36:53 +0200
changeset 7775 26898fbd19ca
parent 7682 46de8064c93c
child 7902 10fd5d922c97
permissions -rw-r--r--
verbatim / verb markupup commands;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
    Author:     Markus Wenzel, TU Muenchen
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     4
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    11
    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    12
    Nat | String | Verbatim | Space | Comment | Sync | EOF
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    13
  type token
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    14
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    15
  val stopper: token * (token -> bool)
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    16
  val not_sync: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    17
  val not_eof: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val position_of: token -> Position.T
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    19
  val pos_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val is_kind: token_kind -> token -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    21
  val keyword_with: (string -> bool) -> token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    22
  val name_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    23
  val is_proper: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    24
  val val_of: token -> string
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
    25
  val is_sid: string -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    26
  val scan: (Scan.lexicon * Scan.lexicon) ->
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    27
    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    28
  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    29
    Position.T -> (Symbol.symbol, 'a) Source.source ->
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    30
    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    31
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    32
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    33
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    34
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    35
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    36
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    37
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    38
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    39
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    40
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    41
datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    42
  Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    43
  Nat | String | Verbatim | Space | Comment | Sync | EOF;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    44
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    45
datatype token = Token of Position.T * (token_kind * string);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    46
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    47
val str_of_kind =
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    48
 fn Command => "command"
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    49
  | Keyword => "keyword"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    50
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    51
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    52
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    53
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    54
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    55
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    56
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    57
  | String => "string"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    58
  | Verbatim => "verbatim text"
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    59
  | Space => "white space"
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    60
  | Comment => "comment text"
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    61
  | Sync => "sync marker"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    62
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    64
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    65
(* sync token *)
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    66
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    67
fun not_sync (Token (_, (Sync, _))) = false
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    68
  | not_sync _ = true;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    69
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    70
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    71
(* eof token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    72
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    73
val eof = Token (Position.none, (EOF, ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    74
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    75
fun is_eof (Token (_, (EOF, _))) = true
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    76
  | is_eof _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    77
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    78
val stopper = (eof, is_eof);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    79
val not_eof = not o is_eof;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    80
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    81
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    82
(* get position *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    83
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    84
fun position_of (Token (pos, _)) = pos;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    85
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    86
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    87
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    88
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
fun is_kind k (Token (_, (k', _))) = k = k';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    91
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    92
fun keyword_with pred (Token (_, (Keyword, x))) = pred x
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    93
  | keyword_with _ _ = false;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    94
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
fun name_of (Token (_, (k, _))) = str_of_kind k;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    96
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    97
fun is_proper (Token (_, (Space, _))) = false
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    98
  | is_proper (Token (_, (Comment, _))) = false
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    99
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   100
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   102
(* value of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   103
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   104
fun val_of (Token (_, (_, x))) = x;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   105
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   106
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   107
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   108
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   109
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   110
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   111
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   112
fun change_prompt scan = Scan.prompt "# " scan;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   113
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   114
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   115
(* diagnostics *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   116
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   117
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   118
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   119
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   120
(* line numbering *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   121
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   122
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   123
val keep_line = Scan.lift;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   124
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
val scan_blank =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   126
  incr_line ($$ "\n") ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   127
  keep_line (Scan.one Symbol.is_blank);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   128
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   129
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   130
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   131
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   132
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   133
fun is_sym_char s = s mem sym_chars;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   134
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   135
val scan_symid = Scan.any1 is_sym_char >> implode;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   136
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   137
fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s);
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   138
val is_sid = is_symid orf Syntax.is_identifier;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   139
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   140
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   141
(* scan strings *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   142
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   143
val scan_str =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   144
  scan_blank ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   145
  keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) ||
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   146
  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   147
    Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   148
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   149
val scan_string =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   150
  keep_line ($$ "\"") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   151
    !! (lex_err (K "missing quote at end of string"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   153
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   154
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   155
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   156
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   157
val scan_verb =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   158
  scan_blank ||
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   159
  keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   160
  keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   161
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   162
val scan_verbatim =
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   163
  keep_line ($$ "{" -- $$ "*") |--
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   164
    !! (lex_err (K "missing end of verbatim text"))
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   165
      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   166
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   167
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   168
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   169
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   170
val is_space = Symbol.is_blank andf not_equal "\n";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   171
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   172
val scan_space =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   173
  (keep_line (Scan.any1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   174
    keep_line (Scan.any is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   175
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   176
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   177
(* scan nested comments *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   178
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   179
val scan_cmt =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   180
  Scan.lift scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   182
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   183
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   184
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   185
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   186
val scan_comment =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   187
  keep_line ($$ "(" -- $$ "*") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   188
    !! (lex_err (K "missing end of comment"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   189
      (change_prompt
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   190
        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   191
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   192
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   193
(* scan token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   194
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   195
fun scan (lex1, lex2) (pos, cs) =
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   196
  let
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   197
    fun token k x = Token (pos, (k, x));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   198
    fun sync _ = token Sync Symbol.sync;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   199
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   200
    val scanner =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   201
      scan_string >> token String ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   202
      scan_verbatim >> token Verbatim ||
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   203
      scan_space >> token Space ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   204
      scan_comment >> token Comment ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   205
      keep_line (Scan.one Symbol.is_sync >> sync) ||
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   206
      keep_line (Scan.max token_leq
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   207
        (Scan.max token_leq
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   208
          (Scan.literal lex1 >> (token Keyword o implode))
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   209
          (Scan.literal lex2 >> (token Command o implode)))
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   210
        (Syntax.scan_longid >> token LongIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   211
          Syntax.scan_id >> token Ident ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   212
          Syntax.scan_var >> token Var ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   213
          Syntax.scan_tid >> token TypeIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   214
          Syntax.scan_tvar >> token TypeVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   215
          Syntax.scan_nat >> token Nat ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   216
          scan_symid >> token SymIdent));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   217
  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   218
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   219
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   220
(* source of (proper) tokens *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   221
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   222
val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   223
fun recover xs = keep_line (Scan.any1 is_junk) xs;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   224
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   225
fun source do_recover get_lex pos src =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   226
  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   227
    (if do_recover then Some recover else None) src;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   228
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   229
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   230
end;