src/Pure/Isar/outer_lex.ML
author wenzelm
Mon, 06 Mar 2000 21:10:27 +0100
changeset 8351 1b8ac0f48233
parent 8231 fa93309ff27e
child 8580 e79ee31d3936
permissions -rw-r--r--
added simple_args; added 'tactic' method;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Isar/outer_lex.ML
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     3
    Author:     Markus Wenzel, TU Muenchen
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     4
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     5
Outer lexical syntax for Isabelle/Isar.
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     6
*)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     7
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     8
signature OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
     9
sig
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    11
    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    12
    Nat | String | Verbatim | Space | Comment | Sync | EOF
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    13
  type token
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    14
  val str_of_kind: token_kind -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    15
  val stopper: token * (token -> bool)
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    16
  val not_sync: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    17
  val not_eof: token -> bool
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    18
  val position_of: token -> Position.T
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    19
  val pos_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    20
  val is_kind: token_kind -> token -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    21
  val keyword_with: (string -> bool) -> token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    22
  val name_of: token -> string
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    23
  val is_proper: token -> bool
7902
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
    24
  val is_indent: token -> bool
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    25
  val val_of: token -> string
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
    26
  val is_sid: string -> bool
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    27
  val scan: (Scan.lexicon * Scan.lexicon) ->
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    28
    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    29
  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    30
    Position.T -> (Symbol.symbol, 'a) Source.source ->
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    31
    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    32
end;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    33
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    34
structure OuterLex: OUTER_LEX =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    35
struct
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    36
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    37
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    38
(** tokens **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    39
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    40
(* datatype token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    41
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    42
datatype token_kind =
7477
c7caea1ce78c removed text vars;
wenzelm
parents: 7026
diff changeset
    43
  Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    44
  Nat | String | Verbatim | Space | Comment | Sync | EOF;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    45
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    46
datatype token = Token of Position.T * (token_kind * string);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    47
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    48
val str_of_kind =
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    49
 fn Command => "command"
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    50
  | Keyword => "keyword"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    51
  | Ident => "identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    52
  | LongIdent => "long identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    53
  | SymIdent => "symbolic identifier"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    54
  | Var => "schematic variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    55
  | TypeIdent => "type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    56
  | TypeVar => "schematic type variable"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    57
  | Nat => "number"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    58
  | String => "string"
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    59
  | Verbatim => "verbatim text"
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    60
  | Space => "white space"
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    61
  | Comment => "comment text"
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    62
  | Sync => "sync marker"
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    63
  | EOF => "end-of-file";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    64
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    65
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    66
(* sync token *)
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    67
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    68
fun not_sync (Token (_, (Sync, _))) = false
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    69
  | not_sync _ = true;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    70
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
    71
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    72
(* eof token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    73
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    74
val eof = Token (Position.none, (EOF, ""));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    75
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    76
fun is_eof (Token (_, (EOF, _))) = true
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    77
  | is_eof _ = false;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    78
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    79
val stopper = (eof, is_eof);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    80
val not_eof = not o is_eof;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    81
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    82
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    83
(* get position *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    84
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    85
fun position_of (Token (pos, _)) = pos;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    86
val pos_of = Position.str_of o position_of;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    87
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    88
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    89
(* kind of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    90
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    91
fun is_kind k (Token (_, (k', _))) = k = k';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    92
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    93
fun keyword_with pred (Token (_, (Keyword, x))) = pred x
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
    94
  | keyword_with _ _ = false;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    95
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    96
fun name_of (Token (_, (k, _))) = str_of_kind k;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
    97
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    98
fun is_proper (Token (_, (Space, _))) = false
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
    99
  | is_proper (Token (_, (Comment, _))) = false
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   100
  | is_proper _ = true;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   101
7902
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   102
(*indentations; note that space tokens obey lines*)
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   103
fun is_indent (Token (_, (Space, s))) =
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   104
      let val n = size s in n > 0 andalso String.substring (s, n - 1, 1) <> "\n" end
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   105
  | is_indent _ = false;
10fd5d922c97 added is_indent;
wenzelm
parents: 7682
diff changeset
   106
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   107
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   108
(* value of token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   109
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   110
fun val_of (Token (_, (_, x))) = x;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   111
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   112
fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   113
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   114
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   115
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   116
(** scanners **)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   117
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   118
fun change_prompt scan = Scan.prompt "# " scan;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   119
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   120
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   121
(* diagnostics *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   122
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   123
fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   124
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   125
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   126
(* line numbering *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   127
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   128
fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   129
val keep_line = Scan.lift;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   130
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   131
val scan_blank =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   132
  incr_line ($$ "\n") ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   133
  keep_line (Scan.one Symbol.is_blank);
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   134
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   135
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   136
(* scan symbolic idents *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   137
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   138
val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   139
fun is_sym_char s = s mem sym_chars;
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   140
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   141
val scan_symid =
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   142
  Scan.any1 is_sym_char >> implode ||
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   143
  Scan.one Symbol.is_symbolic;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   144
8231
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   145
fun is_symid str =
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   146
  (case try Symbol.explode str of
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   147
    Some [s] => Symbol.is_symbolic s orelse is_sym_char s
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   148
  | Some ss => forall is_sym_char ss
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   149
  | _ => false);
fa93309ff27e symid: include single symbolic char;
wenzelm
parents: 7902
diff changeset
   150
5876
273056b673ec replaced is_symid by is_sid;
wenzelm
parents: 5825
diff changeset
   151
val is_sid = is_symid orf Syntax.is_identifier;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   152
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   153
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   154
(* scan strings *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   155
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   156
val scan_str =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   157
  scan_blank ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   158
  keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) ||
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   159
  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   160
    Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   161
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   162
val scan_string =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   163
  keep_line ($$ "\"") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   164
    !! (lex_err (K "missing quote at end of string"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   165
      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   166
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   167
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   168
(* scan verbatim text *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   169
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   170
val scan_verb =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   171
  scan_blank ||
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   172
  keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal "}"))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   173
  keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   174
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   175
val scan_verbatim =
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   176
  keep_line ($$ "{" -- $$ "*") |--
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   177
    !! (lex_err (K "missing end of verbatim text"))
6743
5d50225637c8 changed {| |} verbatim syntax to {* *} in order to simplify ProofGeneral setup;
wenzelm
parents: 5876
diff changeset
   178
      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   179
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   180
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   181
(* scan space *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   182
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   183
val is_space = Symbol.is_blank andf not_equal "\n";
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   184
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   185
val scan_space =
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   186
  (keep_line (Scan.any1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   187
    keep_line (Scan.any is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   188
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   189
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   190
(* scan nested comments *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   191
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   192
val scan_cmt =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   193
  Scan.lift scan_blank ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   194
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   195
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   196
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   197
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_sync andf Symbol.not_eof)));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   198
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   199
val scan_comment =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   200
  keep_line ($$ "(" -- $$ "*") |--
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   201
    !! (lex_err (K "missing end of comment"))
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   202
      (change_prompt
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   203
        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   204
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   205
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   206
(* scan token *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   207
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   208
fun scan (lex1, lex2) (pos, cs) =
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   209
  let
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   210
    fun token k x = Token (pos, (k, x));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   211
    fun sync _ = token Sync Symbol.sync;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   212
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   213
    val scanner =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   214
      scan_string >> token String ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   215
      scan_verbatim >> token Verbatim ||
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   216
      scan_space >> token Space ||
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   217
      scan_comment >> token Comment ||
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   218
      keep_line (Scan.one Symbol.is_sync >> sync) ||
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   219
      keep_line (Scan.max token_leq
7026
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   220
        (Scan.max token_leq
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   221
          (Scan.literal lex1 >> (token Keyword o implode))
69724548fad1 separate command tokens;
wenzelm
parents: 6859
diff changeset
   222
          (Scan.literal lex2 >> (token Command o implode)))
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   223
        (Syntax.scan_longid >> token LongIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   224
          Syntax.scan_id >> token Ident ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   225
          Syntax.scan_var >> token Var ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   226
          Syntax.scan_tid >> token TypeIdent ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   227
          Syntax.scan_tvar >> token TypeVar ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   228
          Syntax.scan_nat >> token Nat ||
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   229
          scan_symid >> token SymIdent));
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   230
  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) end;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   231
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   232
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   233
(* source of (proper) tokens *)
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   234
6859
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   235
val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
2b3db2b6c129 sync token;
wenzelm
parents: 6743
diff changeset
   236
fun recover xs = keep_line (Scan.any1 is_junk) xs;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   237
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   238
fun source do_recover get_lex pos src =
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   239
  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
7682
46de8064c93c added Space, Comment token kinds (keep actual text);
wenzelm
parents: 7477
diff changeset
   240
    (if do_recover then Some recover else None) src;
5825
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   241
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   242
24e4b1780d33 Outer lexical syntax for Isabelle/Isar.
wenzelm
parents:
diff changeset
   243
end;