src/Pure/Thy/thy_scan.ML
author berghofe
Thu, 07 Oct 1999 15:40:32 +0200
changeset 7786 cf9d07ad62af
parent 6667 58b9785f8534
child 11819 9283b3c11234
permissions -rw-r--r--
Replaced update_new by update.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
     1
(*  Title:	Pure/Thy/thy_scan.ML
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
     2
    ID:		$Id$
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
     3
    Author:	Markus Wenzel, TU Muenchen
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
     4
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
     5
Lexer for the outer Isabelle syntax.
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
     6
*)
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
     7
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
     8
signature THY_SCAN =
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
     9
sig
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    10
  datatype token_kind =
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    11
    Keyword | Ident | LongIdent | Var | TypeVar | Nat | String | Verbatim | Ignore | EOF
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    12
  val name_of_kind: token_kind -> string
6207
58e9f980bd4f tokenize: get exploded args;
wenzelm
parents: 5910
diff changeset
    13
  val tokenize: Scan.lexicon -> string list -> (token_kind * string * int) list
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    14
end;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    15
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    16
structure ThyScan: THY_SCAN =
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    17
struct
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    18
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    19
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    20
(** token kinds **)
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    21
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    22
datatype token_kind =
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    23
  Keyword | Ident | LongIdent | Var | TypeVar | Nat | String | Verbatim | Ignore | EOF;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    24
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    25
fun name_of_kind Keyword = "keyword"
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    26
  | name_of_kind Ident = "identifier"
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    27
  | name_of_kind LongIdent = "long identifier"
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    28
  | name_of_kind Var = "schematic variable"
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    29
  | name_of_kind TypeVar = "type variable"
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    30
  | name_of_kind Nat = "natural number"
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    31
  | name_of_kind String = "string"
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    32
  | name_of_kind Verbatim = "verbatim text"
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    33
  | name_of_kind Ignore = "ignore"
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    34
  | name_of_kind EOF = "end-of-file";
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    35
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    36
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    37
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    38
(** scanners **)
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    39
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    40
(* diagnostics *)
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    41
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    42
fun lex_err None msg = "Outer lexical error: " ^ msg
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    43
  | lex_err (Some n) msg = "Outer lexical error on line " ^ string_of_int n ^ ": " ^ msg;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    44
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    45
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    46
(* line numbering *)
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    47
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    48
val incr = apsome (fn n:int => n + 1);
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    49
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    50
fun incr_line scan = Scan.depend (fn n => scan >> pair (incr n));
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    51
val keep_line = Scan.lift;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    52
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    53
val scan_blank =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    54
  incr_line ($$ "\n") ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    55
  keep_line (Scan.one Symbol.is_blank);
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    56
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    57
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    58
(* scan ML-style strings *)
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    59
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    60
val scan_ctrl =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    61
  $$ "^" ^^ Scan.one (fn c => Symbol.is_ascii c andalso ord c >= 64 andalso ord c <= 95);
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    62
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    63
val scan_dig = Scan.one Symbol.is_digit;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    64
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    65
val scan_esc =
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4705
diff changeset
    66
  keep_line ($$ "\\") ^^ !! (fn ((n, _), _) => lex_err n "bad escape sequence in string")
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    67
    (keep_line ($$ "n" || $$ "t" || $$ "\"" || $$ "\\" ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    68
      scan_ctrl || scan_dig ^^ scan_dig ^^ scan_dig) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    69
      (Scan.repeat1 scan_blank >> implode) ^^ keep_line ($$ "\\"));
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    70
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    71
val scan_str =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    72
  scan_esc ||
5910
151ee1a5c09c Symbol.space;
wenzelm
parents: 5112
diff changeset
    73
  scan_blank >> K Symbol.space ||
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    74
  keep_line (Scan.one (not_equal "\"" andf Symbol.not_eof));
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    75
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    76
val scan_string =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    77
  keep_line ($$ "\"") ^^
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4705
diff changeset
    78
  !! (fn ((n, _), _) => lex_err n "missing quote at end of string")
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    79
    ((Scan.repeat scan_str >> implode) ^^ keep_line ($$ "\""));
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    80
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    81
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    82
(* scan verbatim text *)
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    83
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    84
val scan_verb =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    85
  scan_blank ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    86
  keep_line ($$ "|" --| Scan.ahead (Scan.one (not_equal "}"))) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    87
  keep_line (Scan.one (not_equal "|" andf Symbol.not_eof));
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    88
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    89
val scan_verbatim =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    90
  keep_line ($$ "{" -- $$ "|") |--
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4705
diff changeset
    91
  !! (fn ((n, _), _) => lex_err n "missing end of verbatim text")
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    92
    ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "|" -- $$ "}"));
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    93
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    94
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    95
(* scan nested comments *)
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
    96
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    97
val scan_cmt =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    98
  Scan.lift scan_blank ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
    99
  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   100
  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   101
  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   102
  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)));
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   103
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   104
val scan_comment =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   105
  keep_line ($$ "(" -- $$ "*") |--
4921
74bc10921f7d adapted to new Scan.fail_with / Scan.!!;
wenzelm
parents: 4705
diff changeset
   106
  !! (fn ((n, _), _) => lex_err n "missing end of comment")
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   107
    (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K "");
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   108
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   109
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   110
(* scan token *)
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   111
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   112
fun token k None x = (k, x, 0)
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   113
  | token k (Some n) x = (k, x, n);
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   114
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   115
fun scan_tok lex (n, cs) =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   116
 (scan_string >> token String n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   117
  scan_verbatim >> token Verbatim n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   118
  Scan.repeat1 scan_blank >> (token Ignore n o implode) ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   119
  scan_comment >> token Ignore n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   120
  keep_line (Scan.max (fn ((_, x, _), (_, x', _)) => x <= x')
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   121
    (Scan.literal lex >> (token Keyword n o implode))
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   122
    (Syntax.scan_longid >> token LongIdent n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   123
      Syntax.scan_id >> token Ident n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   124
      Syntax.scan_var >> token Var n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   125
      Syntax.scan_tid >> token TypeVar n ||
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   126
      Syntax.scan_nat >> token Nat n))) (n, cs);
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   127
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   128
val scan_rest = Scan.any Symbol.not_eof >> implode;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   129
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   130
fun scan_token lex x =
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   131
  (case scan_tok lex x of
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   132
    ((Keyword, "ML", n), x') => (keep_line scan_rest >> token Verbatim (Some n)) x'
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   133
  | tok_x' => tok_x');
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   134
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   135
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   136
(* tokenize *)
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   137
6207
58e9f980bd4f tokenize: get exploded args;
wenzelm
parents: 5910
diff changeset
   138
fun tokenize lex chs =
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   139
  let
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   140
    val scan_toks = Scan.repeat (scan_token lex);
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   141
    val ignore = fn (Ignore, _, _) => true | _ => false;
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   142
  in
4938
c8bbbf3c59fa Symbol.stopper;
wenzelm
parents: 4921
diff changeset
   143
    (case Scan.error (Scan.finite' Symbol.stopper scan_toks) (Some 1, chs) of
4705
f71017706887 adapted to new scanner, baroque chars;
wenzelm
parents: 3831
diff changeset
   144
      (toks, (n, [])) => filter_out ignore toks @ [token EOF n "end-of-file"]
5112
9e74cf11e4a4 Symbol.beginning;
wenzelm
parents: 4938
diff changeset
   145
    | (_, (n, cs)) => error (lex_err n ("Bad input " ^ quote (Symbol.beginning cs))))
388
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   146
  end;
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   147
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   148
dcf6c0774075 (replaces Thy/scan.ML)
wenzelm
parents:
diff changeset
   149
end;