(replaces Thy/scan.ML)
authorwenzelm
Thu, 19 May 1994 16:25:03 +0200
changeset 388 dcf6c0774075
parent 387 69f4356d915d
child 389 85105a7fb668
(replaces Thy/scan.ML) scanner now based on combinators of structure Scanner; improved handling of keywords; now supports long.ids, (* (* nested *) comments *), {| verbatim text |};
src/Pure/Thy/thy_scan.ML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/Thy/thy_scan.ML	Thu May 19 16:25:03 1994 +0200
@@ -0,0 +1,138 @@
+(*  Title:      Pure/Thy/thy_scan.ML
+    ID:         $Id$
+    Author:     Markus Wenzel, TU Muenchen
+
+The scanner for theory files.
+*)
+
+signature THY_SCAN =
+sig
+  datatype token_kind =
+    Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF
+  val name_of_kind: token_kind -> string
+  type lexicon
+  val make_lexicon: string list -> lexicon
+  val tokenize: lexicon -> string -> (token_kind * string * int) list
+end;
+
+
+functor ThyScanFun(Scanner: SCANNER): THY_SCAN =
+struct
+
+open Scanner;
+
+
+(** token kinds **)
+
+datatype token_kind =
+  Keyword | Ident | LongIdent | TypeVar | Nat | String | Verbatim | EOF;
+
+fun name_of_kind Keyword = "keyword"
+  | name_of_kind Ident = "identifier"
+  | name_of_kind LongIdent = "long identifier"
+  | name_of_kind TypeVar = "type variable"
+  | name_of_kind Nat = "natural number"
+  | name_of_kind String = "string"
+  | name_of_kind Verbatim = "verbatim text"
+  | name_of_kind EOF = "end-of-file";
+
+
+
+(** scanners **)
+
+fun lex_err line msg =
+  error ("Lexical error on line " ^ string_of_int line ^ ": " ^ msg);
+
+
+(* misc utilities *)
+
+fun count_lines cs =
+  foldl (fn (sum, c) => if c = "\n" then sum + 1 else sum) (0, cs);
+
+fun inc_line n s = n + count_lines (explode s);
+
+fun cons_fst c (cs, x, y) = (c :: cs, x, y);
+
+
+(* scan strings *)
+
+val scan_ctrl = $$ "^" ^^ scan_one (fn c => ord c >= 64 andalso ord c <= 95);
+
+val scan_dig = scan_one is_digit;
+
+val scan_blanks1 = scan_any1 is_blank >> implode;
+
+val scan_esc =
+  $$ "n" || $$ "t" || $$ "\"" || $$ "\\" ||
+  scan_ctrl || scan_dig ^^ scan_dig ^^ scan_dig ||
+  scan_blanks1 ^^ $$ "\\";
+
+
+fun string ("\"" :: cs) n = (["\""], cs, n)
+  | string ("\\" :: cs) n =
+      (case optional scan_esc cs of
+        (None, _) => lex_err n "bad escape sequence in string"
+      | (Some s, cs') => cons_fst ("\\" ^ s) (string cs' (inc_line n s)))
+  | string (c :: cs) n = cons_fst c (string cs n)
+  | string [] n = lex_err n "missing quote at end of string";
+
+
+(* scan verbatim text *)
+
+fun verbatim ("|" :: "}" :: cs) n = ([], cs, n)
+  | verbatim (c :: cs) n = cons_fst c (verbatim cs (inc_line n c))
+  | verbatim [] n = lex_err n "missing end of verbatim text";
+
+
+(* scan nested comments *)
+
+fun comment ("*" :: ")" :: cs) n 1 = (cs, n)
+  | comment ("*" :: ")" :: cs) n d = comment cs n (d - 1)
+  | comment ("(" :: "*" :: cs) n d = comment cs n (d + 1)
+  | comment (c :: cs) n d = comment cs (inc_line n c) d
+  | comment [] n _ = lex_err n "missing end of comment";
+
+
+
+(** tokenize **)
+
+val scan_longid = scan_id ^^ (repeat1 ($$ "." ^^ scan_id) >> implode);
+
+fun scan_word lex =
+  max_of
+    (scan_literal lex >> pair Keyword)
+    (scan_longid >> pair LongIdent ||
+      scan_id >> pair Ident ||
+      scan_tid >> pair TypeVar ||
+      scan_nat >> pair Nat);
+
+fun add_tok toks kind n (cs, cs', n') =
+  ((kind, implode cs, n) :: toks, cs', n');
+
+val take_line = implode o fst o take_prefix (not_equal "\n");
+
+
+fun tokenize lex str =
+  let
+    fun scan (toks, [], n) = rev ((EOF, "end-of-file", n) :: toks)
+      | scan (toks, "\"" :: cs, n) =
+          scan (add_tok toks String n (cons_fst "\"" (string cs n)))
+      | scan (toks, "{" :: "|" :: cs, n) =
+          scan (add_tok toks Verbatim n (verbatim cs n))
+      | scan (toks, "(" :: "*" :: cs, n) =
+          scan ((fn (cs', n') => (toks, cs', n')) (comment cs n 1))
+      | scan (toks, chs as c :: cs, n) =
+          if is_blank c then scan (toks, cs, inc_line n c)
+          else
+            (case scan_word lex chs of
+              (None, _) => lex_err n (quote (take_line chs))
+            | (Some (Keyword, "ML"), chs') => scan ((Verbatim, implode chs', n)
+                :: (Keyword, "ML", n) :: toks, [], n + count_lines chs')
+            | (Some (tk, s), chs') => scan ((tk, s, n) :: toks, chs', n));
+  in
+    scan ([], explode str, 1)
+  end;
+
+
+end;
+