Outer lexical syntax for Isabelle/Isar.
authorwenzelm
Mon, 09 Nov 1998 15:32:58 +0100
changeset 5825 24e4b1780d33
parent 5824 91113aa09371
child 5826 977f789566b7
Outer lexical syntax for Isabelle/Isar.
src/Pure/Isar/outer_lex.ML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Isar/outer_lex.ML	Mon Nov 09 15:32:58 1998 +0100
@@ -0,0 +1,217 @@
+(*  Title:      Pure/Isar/outer_lex.ML
+    ID:         $Id$
+    Author:     Markus Wenzel, TU Muenchen
+
+Outer lexical syntax for Isabelle/Isar.
+*)
+
+signature OUTER_LEX =
+sig
+  datatype token_kind =
+    Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
+    String | Verbatim | Ignore | EOF
+  type token
+  val str_of_kind: token_kind -> string
+  val stopper: token * (token -> bool)
+  val not_eof: token -> bool
+  val position_of: token -> Position.T
+  val pos_of: token -> string
+  val is_kind: token_kind -> token -> bool
+  val keyword_pred: (string -> bool) -> token -> bool
+  val name_of: token -> string
+  val is_proper: token -> bool
+  val val_of: token -> string
+  val is_symid: string -> bool
+  val scan: Scan.lexicon ->
+    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
+  val source: bool -> (unit -> Scan.lexicon) -> Position.T -> (Symbol.symbol, 'a) Source.source ->
+    (token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
+end;
+
+structure OuterLex: OUTER_LEX =
+struct
+
+
+(** tokens **)
+
+(* datatype token *)
+
+datatype token_kind =
+  Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat |
+  String | Verbatim | Ignore | EOF;
+
+datatype token = Token of Position.T * (token_kind * string);
+
+val str_of_kind =
+ fn Keyword => "keyword"
+  | Ident => "identifier"
+  | LongIdent => "long identifier"
+  | SymIdent => "symbolic identifier"
+  | Var => "schematic variable"
+  | TextVar => "text variable"
+  | TypeIdent => "type variable"
+  | TypeVar => "schematic type variable"
+  | Nat => "number"
+  | String => "string"
+  | Verbatim => "verbatim text"
+  | Ignore => "ignored text"
+  | EOF => "end-of-file";
+
+
+(* eof token *)
+
+val eof = Token (Position.none, (EOF, ""));
+
+fun is_eof (Token (_, (EOF, _))) = true
+  | is_eof _ = false;
+
+val stopper = (eof, is_eof);
+val not_eof = not o is_eof;
+
+
+(* get position *)
+
+fun position_of (Token (pos, _)) = pos;
+val pos_of = Position.str_of o position_of;
+
+
+(* kind of token *)
+
+fun is_kind k (Token (_, (k', _))) = k = k';
+
+fun keyword_pred pred (Token (_, (Keyword, x))) = pred x
+  | keyword_pred _ _ = false;
+
+fun name_of (Token (_, (k, _))) = str_of_kind k;
+
+fun is_proper (Token (_, (Ignore, _))) = false
+  | is_proper _ = true;
+
+
+(* value of token *)
+
+fun val_of (Token (_, (_, x))) = x;
+
+fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
+
+
+
+(** scanners **)
+
+fun change_prompt scan = Scan.prompt "# " scan;
+
+
+(* diagnostics *)
+
+fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
+
+
+(* line numbering *)
+
+fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
+val keep_line = Scan.lift;
+
+val scan_blank =
+  incr_line ($$ "\n") ||
+  keep_line (Scan.one Symbol.is_blank);
+
+
+(* scan symbolic idents *)
+
+val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~";
+fun is_sym_char s = s mem sym_chars;
+
+fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s);
+
+val scan_symid = Scan.any1 is_sym_char >> implode;
+
+
+(* scan strings *)
+
+val scan_str =
+  scan_blank >> K Symbol.space ||
+  keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) ||
+  keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof));
+
+val scan_string =
+  keep_line ($$ "\"") |--
+    !! (lex_err (K "missing quote at end of string"))
+      (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\"")));
+
+
+(* scan verbatim text *)
+
+val scan_verb =
+  scan_blank ||
+  keep_line ($$ "|" --| Scan.ahead (Scan.one (not_equal "}"))) ||
+  keep_line (Scan.one (not_equal "|" andf Symbol.not_eof));
+
+val scan_verbatim =
+  keep_line ($$ "{" -- $$ "|") |--
+    !! (lex_err (K "missing end of verbatim text"))
+      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "|" -- $$ "}")));
+
+
+(* scan space *)
+
+val is_space = Symbol.is_blank andf not_equal "\n";
+
+val scan_space =
+  keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" ||
+  keep_line (Scan.any is_space) |-- incr_line ($$ "\n");
+
+
+(* scan nested comments *)
+
+val scan_cmt =
+  Scan.lift scan_blank ||
+  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
+  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
+  Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) ||
+  Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof)));
+
+val scan_comment =
+  keep_line ($$ "(" -- $$ "*") |--
+    !! (lex_err (K "missing end of comment"))
+      (change_prompt
+        (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K ""));
+
+
+(* scan token *)
+
+fun scan lex (pos, cs) =
+  let
+    fun token k x = Token (pos, (k, x));
+    fun ignore _ = token Ignore "";
+
+    val scanner =
+      scan_string >> token String ||
+      scan_verbatim >> token Verbatim ||
+      scan_space >> ignore ||
+      scan_comment >> ignore ||
+      keep_line (Scan.max token_leq
+        (Scan.literal lex >> (token Keyword o implode))
+        (Syntax.scan_longid >> token LongIdent ||
+          Syntax.scan_id >> token Ident ||
+          Syntax.scan_var >> token Var ||
+          $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar ||
+          Syntax.scan_tid >> token TypeIdent ||
+          Syntax.scan_tvar >> token TypeVar ||
+          Syntax.scan_nat >> token Nat ||
+          scan_symid >> token SymIdent));
+  in
+    !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs)
+  end;
+
+
+(* source of (proper) tokens *)
+
+fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs;
+
+fun source do_recover get_lex pos src =
+  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
+    (if do_recover then Some recover else None) src
+  |> Source.filter is_proper;
+
+
+end;