src/Pure/Isar/outer_lex.ML
author wenzelm
Mon, 16 Nov 1998 10:58:18 +0100
changeset 5876 273056b673ec
parent 5825 24e4b1780d33
child 6743 5d50225637c8
permissions -rw-r--r--
replaced is_symid by is_sid;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
    Author:     Markus Wenzel, TU Muenchen
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     4
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    11
    Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    12
    String | Verbatim | Ignore | EOF
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    13
  type token
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    14
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    15
  val stopper: token * (token -> bool)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    16
  val not_eof: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    17
  val position_of: token -> Position.T
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val pos_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    19
  val is_kind: token_kind -> token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val keyword_pred: (string -> bool) -> token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    21
  val name_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    22
  val is_proper: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    23
  val val_of: token -> string
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
    24
  val is_sid: string -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    25
  val scan: Scan.lexicon ->
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    26
    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    27
  val source: bool -> (unit -> Scan.lexicon) -> Position.T -> (Symbol.symbol, 'a) Source.source ->
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    28
    (token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    29
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    30
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    31
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    32
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    33
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    34
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    35
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    36
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    37
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    38
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    39
datatype token_kind =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    40
  Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    41
  String | Verbatim | Ignore | EOF;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    42
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    43
datatype token = Token of Position.T * (token_kind * string);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    44
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    45
val str_of_kind =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    46
 fn Keyword => "keyword"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    47
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    48
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    49
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    50
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    51
  | TextVar => "text variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    52
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    53
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    54
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    55
  | String => "string"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    56
  | Verbatim => "verbatim text"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    57
  | Ignore => "ignored text"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    58
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    59
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    60
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    61
(* eof token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    62
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
val eof = Token (Position.none, (EOF, ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    64
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    65
fun is_eof (Token (_, (EOF, _))) = true
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    66
  | is_eof _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    67
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    68
val stopper = (eof, is_eof);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    69
val not_eof = not o is_eof;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    70
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    71
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    72
(* get position *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    73
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    74
fun position_of (Token (pos, _)) = pos;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    75
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    76
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    77
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    78
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    79
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    80
fun is_kind k (Token (_, (k', _))) = k = k';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    81
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    82
fun keyword_pred pred (Token (_, (Keyword, x))) = pred x
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    83
  | keyword_pred _ _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    84
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    85
fun name_of (Token (_, (k, _))) = str_of_kind k;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    86
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    87
fun is_proper (Token (_, (Ignore, _))) = false
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    88
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    91
(* value of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    92
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    93
fun val_of (Token (_, (_, x))) = x;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    94
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    96
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    97
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    98
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    99
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   100
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
fun change_prompt scan = Scan.prompt "# " scan;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   102
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   103
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   104
(* diagnostics *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   105
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   106
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   107
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   108
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   109
(* line numbering *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   110
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   111
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   112
val keep_line = Scan.lift;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   113
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   114
val scan_blank =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   115
  incr_line ($$ "\n") ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   116
  keep_line (Scan.one Symbol.is_blank);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   117
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   118
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   119
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   120
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   121
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   122
fun is_sym_char s = s mem sym_chars;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   123
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   124
val scan_symid = Scan.any1 is_sym_char >> implode;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   126
fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s);
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   127
val is_sid = is_symid orf Syntax.is_identifier;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   128
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   129
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   130
(* scan strings *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   131
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   132
val scan_str =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   133
  scan_blank >> K Symbol.space ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   134
  keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   135
  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   136
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   137
val scan_string =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   138
  keep_line ($$ "\"") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   139
    !! (lex_err (K "missing quote at end of string"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   140
      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   141
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   142
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   143
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   144
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   145
val scan_verb =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   146
  scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   147
  keep_line ($$ "|" --| Scan.ahead (Scan.one (not_equal "}"))) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   148
  keep_line (Scan.one (not_equal "|" andf Symbol.not_eof));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   149
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   150
val scan_verbatim =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   151
  keep_line ($$ "{" -- $$ "|") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
    !! (lex_err (K "missing end of verbatim text"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   153
      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "|" -- $$ "}")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   154
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   155
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   156
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   157
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   158
val is_space = Symbol.is_blank andf not_equal "\n";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   159
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   160
val scan_space =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   161
  keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   162
  keep_line (Scan.any is_space) |-- incr_line ($$ "\n");
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   163
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   164
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   165
(* scan nested comments *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   166
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   167
val scan_cmt =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   168
  Scan.lift scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   169
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   170
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   171
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   172
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   173
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   174
val scan_comment =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   175
  keep_line ($$ "(" -- $$ "*") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   176
    !! (lex_err (K "missing end of comment"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   177
      (change_prompt
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   178
        (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   179
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   180
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
(* scan token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   182
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   183
fun scan lex (pos, cs) =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   184
  let
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   185
    fun token k x = Token (pos, (k, x));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   186
    fun ignore _ = token Ignore "";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   187
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   188
    val scanner =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   189
      scan_string >> token String ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   190
      scan_verbatim >> token Verbatim ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   191
      scan_space >> ignore ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   192
      scan_comment >> ignore ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   193
      keep_line (Scan.max token_leq
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   194
        (Scan.literal lex >> (token Keyword o implode))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   195
        (Syntax.scan_longid >> token LongIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   196
          Syntax.scan_id >> token Ident ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   197
          Syntax.scan_var >> token Var ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   198
          $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   199
          Syntax.scan_tid >> token TypeIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   200
          Syntax.scan_tvar >> token TypeVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   201
          Syntax.scan_nat >> token Nat ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   202
          scan_symid >> token SymIdent));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   203
  in
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   204
    !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   205
  end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   206
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   207
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   208
(* source of (proper) tokens *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   209
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   210
fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   211
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   212
fun source do_recover get_lex pos src =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   213
  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   214
    (if do_recover then Some recover else None) src
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   215
  |> Source.filter is_proper;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   216
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   217
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   218
end;