src/Pure/Isar/outer_lex.ML
author wenzelm
Wed, 07 Jun 2000 14:19:48 +0200
changeset 9051 887a15590f0e
parent 8807 0046be1769f9
child 9130 ff8789b49d2e
permissions -rw-r--r--
string syntax: allow \\ \" \\n only;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
    Author:     Markus Wenzel, TU Muenchen
8807
wenzelm
parents: 8663
diff changeset
     4
    License:    GPL (GNU GENERAL PUBLIC LICENSE)
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    10
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    11
  datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    12
    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    13
    Nat | String | Verbatim | Space | Comment | Sync | EOF
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    14
  type token
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    15
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    16
  val stopper: token * (token -> bool)
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    17
  val not_sync: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val not_eof: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    19
  val position_of: token -> Position.T
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val pos_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    21
  val is_kind: token_kind -> token -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    22
  val keyword_with: (string -> bool) -> token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    23
  val name_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    24
  val is_proper: token -> bool
8580
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
    25
  val is_begin_ignore: token -> bool
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
    26
  val is_end_ignore: token -> bool
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
    27
  val is_newline: token -> bool
7902
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
    28
  val is_indent: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    29
  val val_of: token -> string
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
    30
  val is_sid: string -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    31
  val scan: (Scan.lexicon * Scan.lexicon) ->
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    32
    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    33
  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    34
    Position.T -> (Symbol.symbol, 'a) Source.source ->
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    35
    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    36
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    37
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    38
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    39
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    40
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    41
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    42
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    43
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    44
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    45
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    46
datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    47
  Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    48
  Nat | String | Verbatim | Space | Comment | Sync | EOF;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    49
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    50
datatype token = Token of Position.T * (token_kind * string);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    51
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    52
val str_of_kind =
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    53
 fn Command => "command"
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    54
  | Keyword => "keyword"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    55
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    56
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    57
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    58
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    59
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    60
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    61
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    62
  | String => "string"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
  | Verbatim => "verbatim text"
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    64
  | Space => "white space"
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    65
  | Comment => "comment text"
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    66
  | Sync => "sync marker"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    67
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    68
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    69
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    70
(* sync token *)
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    71
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    72
fun not_sync (Token (_, (Sync, _))) = false
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    73
  | not_sync _ = true;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    74
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    75
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    76
(* eof token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    77
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    78
val eof = Token (Position.none, (EOF, ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    79
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    80
fun is_eof (Token (_, (EOF, _))) = true
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    81
  | is_eof _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    82
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    83
val stopper = (eof, is_eof);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    84
val not_eof = not o is_eof;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    85
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    86
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    87
(* get position *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    88
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
fun position_of (Token (pos, _)) = pos;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    91
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    92
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    93
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    94
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
fun is_kind k (Token (_, (k', _))) = k = k';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    96
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    97
fun keyword_with pred (Token (_, (Keyword, x))) = pred x
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    98
  | keyword_with _ _ = false;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    99
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   100
fun name_of (Token (_, (k, _))) = str_of_kind k;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   102
fun is_proper (Token (_, (Space, _))) = false
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   103
  | is_proper (Token (_, (Comment, _))) = false
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   104
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   105
8580
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   106
fun is_begin_ignore (Token (_, (Comment, "<"))) = true
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   107
  | is_begin_ignore _ = false;
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   108
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   109
fun is_end_ignore (Token (_, (Comment, ">"))) = true
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   110
  | is_end_ignore _ = false;
e79ee31d3936 added is_begin/end_ignore;
wenzelm
parents: 8231
diff changeset
   111
8651
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   112
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   113
(* newline and indentations (note that space tokens obey lines) *)
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   114
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   115
fun is_newline (Token (_, (Space, "\n"))) = true
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   116
  | is_newline _ = false;
f095f3b8181a added is_newline;
wenzelm
parents: 8580
diff changeset
   117
7902
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   118
fun is_indent (Token (_, (Space, s))) =
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   119
      let val n = size s in n > 0 andalso String.substring (s, n - 1, 1) <> "\n" end
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   120
  | is_indent _ = false;
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   121
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   122
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   123
(* value of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   124
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
fun val_of (Token (_, (_, x))) = x;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   126
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   127
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   128
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   129
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   130
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   131
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   132
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   133
fun change_prompt scan = Scan.prompt "# " scan;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   134
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   135
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   136
(* diagnostics *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   137
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   138
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   139
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   140
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   141
(* line numbering *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   142
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   143
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   144
val keep_line = Scan.lift;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   145
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   146
val scan_blank =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   147
  incr_line ($$ "\n") ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   148
  keep_line (Scan.one Symbol.is_blank);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   149
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   150
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   151
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   153
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   154
fun is_sym_char s = s mem sym_chars;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   155
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   156
val scan_symid =
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   157
  Scan.any1 is_sym_char >> implode ||
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   158
  Scan.one Symbol.is_symbolic;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   159
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   160
fun is_symid str =
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   161
  (case try Symbol.explode str of
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   162
    Some [s] => Symbol.is_symbolic s orelse is_sym_char s
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   163
  | Some ss => forall is_sym_char ss
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   164
  | _ => false);
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   165
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   166
val is_sid = is_symid orf Syntax.is_identifier;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   167
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   168
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   169
(* scan strings *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   170
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   171
val scan_str =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   172
  scan_blank ||
9051
887a15590f0e string syntax: allow \\ \" \\n only;
wenzelm
parents: 8807
diff changeset
   173
  keep_line ($$ "\\") |-- !! (lex_err (K "bad escape character in string"))
887a15590f0e string syntax: allow \\ \" \\n only;
wenzelm
parents: 8807
diff changeset
   174
      (scan_blank || keep_line ($$ "\"" || $$ "\\")) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   175
  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   176
    Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   177
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   178
val scan_string =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   179
  keep_line ($$ "\"") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   180
    !! (lex_err (K "missing quote at end of string"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   182
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   183
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   184
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   185
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   186
val scan_verb =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   187
  scan_blank ||
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   188
  keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   189
  keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   190
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   191
val scan_verbatim =
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   192
  keep_line ($$ "{" -- $$ "*") |--
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   193
    !! (lex_err (K "missing end of verbatim text"))
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   194
      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   195
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   196
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   197
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   198
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   199
val is_space = Symbol.is_blank andf not_equal "\n";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   200
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   201
val scan_space =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   202
  (keep_line (Scan.any1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   203
    keep_line (Scan.any is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   204
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   205
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   206
(* scan nested comments *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   207
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   208
val scan_cmt =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   209
  Scan.lift scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   210
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   211
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   212
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   213
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   214
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   215
val scan_comment =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   216
  keep_line ($$ "(" -- $$ "*") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   217
    !! (lex_err (K "missing end of comment"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   218
      (change_prompt
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   219
        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   220
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   221
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   222
(* scan token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   223
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   224
fun scan (lex1, lex2) (pos, cs) =
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   225
  let
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   226
    fun token k x = Token (pos, (k, x));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   227
    fun sync _ = token Sync Symbol.sync;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   228
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   229
    val scanner =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   230
      scan_string >> token String ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   231
      scan_verbatim >> token Verbatim ||
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   232
      scan_space >> token Space ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   233
      scan_comment >> token Comment ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   234
      keep_line (Scan.one Symbol.is_sync >> sync) ||
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   235
      keep_line (Scan.max token_leq
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   236
        (Scan.max token_leq
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   237
          (Scan.literal lex1 >> (token Keyword o implode))
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   238
          (Scan.literal lex2 >> (token Command o implode)))
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   239
        (Syntax.scan_longid >> token LongIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   240
          Syntax.scan_id >> token Ident ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   241
          Syntax.scan_var >> token Var ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   242
          Syntax.scan_tid >> token TypeIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   243
          Syntax.scan_tvar >> token TypeVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   244
          Syntax.scan_nat >> token Nat ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   245
          scan_symid >> token SymIdent));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   246
  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   247
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   248
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   249
(* source of (proper) tokens *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   250
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   251
val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
8663
38d7ec8ef683 tuned recover;
wenzelm
parents: 8651
diff changeset
   252
fun recover xs = keep_line (Scan.any is_junk) xs;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   253
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   254
fun source do_recover get_lex pos src =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   255
  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   256
    (if do_recover then Some recover else None) src;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   257
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   258
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   259
end;