src/Pure/Isar/outer_lex.ML
changeset 27769 ad50c38ef842
parent 27752 ea7d573e565f
child 27780 7d0910f662f7
--- a/src/Pure/Isar/outer_lex.ML	Thu Aug 07 13:45:03 2008 +0200
+++ b/src/Pure/Isar/outer_lex.ML	Thu Aug 07 13:45:05 2008 +0200
@@ -36,22 +36,17 @@
   val unparse: token -> string
   val text_of: token -> string * string
   val is_sid: string -> bool
-  val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
-  val scan_quoted: Position.T * Symbol.symbol list ->
-    Symbol.symbol list * (Position.T * Symbol.symbol list)
-  val scan: (Scan.lexicon * Scan.lexicon) ->
-    Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
-  val source: bool option -> (unit -> Scan.lexicon * Scan.lexicon) ->
-    Position.T -> (Symbol.symbol, 'a) Source.source ->
-    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
-  val source_proper: (token, 'a) Source.source ->
-    (token, (token, 'a) Source.source) Source.source
+  val !!! : string -> (SymbolPos.T list -> 'a) -> SymbolPos.T list -> 'a
+  val scan_quoted: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list
+  val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source
+  val source: bool Option.option -> (unit -> Scan.lexicon * Scan.lexicon) ->
+    Position.T -> (Symbol.symbol, 'a) Source.source -> (token,
+      (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
 end;
 
 structure OuterLex: OUTER_LEX =
 struct
 
-
 (** tokens **)
 
 (* datatype token *)
@@ -60,7 +55,7 @@
   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | Nat |
   String | AltString | Verbatim | Space | Comment | Malformed | Error of string | Sync | EOF;
 
-datatype token = Token of (Position.range * string) * (token_kind * string);
+datatype token = Token of (string * Position.range) * (token_kind * string);
 
 val str_of_kind =
  fn Command => "command"
@@ -85,15 +80,15 @@
 
 (* position *)
 
-fun position_of (Token (((pos, _), _), _)) = pos;
-fun end_position_of (Token (((_, pos), _), _)) = pos;
+fun position_of (Token ((_, (pos, _)), _)) = pos;
+fun end_position_of (Token ((_, (_, pos)), _)) = pos;
 
 val pos_of = Position.str_of o position_of;
 
 
 (* control tokens *)
 
-fun mk_eof pos = Token (((pos, Position.none), ""), (EOF, ""));
+fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""));
 val eof = mk_eof Position.none;
 
 fun is_eof (Token (_, (EOF, _))) = true
@@ -111,7 +106,6 @@
 (* kind of token *)
 
 fun kind_of (Token (_, (k, _))) = k;
-
 fun is_kind k (Token (_, (k', _))) = k = k';
 
 fun keyword_with pred (Token (_, (Keyword, x))) = pred x
@@ -150,9 +144,8 @@
 
 fun val_of (Token (_, (_, x))) = x;
 
-fun source_of (Token ((range, src), _)) =
-  XML.Elem (Markup.tokenN, Position.properties_of (Position.encode_range range), [XML.Text src])
-  |> YXML.string_of;
+fun source_of (Token ((src, (pos, _)), _)) =
+  YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text src]));
 
 
 (* unparse *)
@@ -188,13 +181,11 @@
 
 (** scanners **)
 
-fun change_prompt scan = Scan.prompt "# " scan;
-
+open BasicSymbolPos;
 
-(* diagnostics *)
+fun !!! msg = SymbolPos.!!! ("Outer lexical error: " ^ msg);
 
-fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs;
-fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
+fun change_prompt scan = Scan.prompt "# " scan;
 
 
 (* scan symbolic idents *)
@@ -202,8 +193,8 @@
 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
 
 val scan_symid =
-  Scan.many1 is_sym_char ||
-  Scan.one Symbol.is_symbolic >> single;
+  Scan.many1 (is_sym_char o symbol) ||
+  Scan.one (Symbol.is_symbolic o symbol) >> single;
 
 fun is_symid str =
   (case try Symbol.explode str of
@@ -222,31 +213,27 @@
 local
 
 val char_code =
-  Scan.one Symbol.is_ascii_digit --
-  Scan.one Symbol.is_ascii_digit --
-  Scan.one Symbol.is_ascii_digit :|--
-  (fn ((a, b), c) =>
+  Scan.one (Symbol.is_ascii_digit o symbol) --
+  Scan.one (Symbol.is_ascii_digit o symbol) --
+  Scan.one (Symbol.is_ascii_digit o symbol) :|--
+  (fn (((a, pos), (b, _)), (c, _)) =>
     let val (n, _) = Library.read_int [a, b, c]
-    in if n <= 255 then Scan.succeed [chr n, Symbol.DEL, Symbol.DEL] else Scan.fail end);
+    in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end);
 
 fun scan_str q =
-  Scan.lift ($$ "\\") |-- !!! "bad escape character in string"
-    (Scan.lift (($$ q || $$ "\\") >> single || char_code) >> cons Symbol.DEL) ||
-  Scan.lift (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single);
+  $$$ "\\" |-- !!! "bad escape character in string" ($$$ q || $$$ "\\" || char_code) ||
+  Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.is_regular s) >> single;
 
 fun scan_strs q =
-  Scan.lift ($$ q) |-- !!! "missing quote at end of string"
-    (change_prompt (Scan.repeat (scan_str q) --| Scan.lift ($$ q)))
-  >> (fn body => Symbol.DEL :: flat body @ [Symbol.DEL]);
+  $$$ q |-- !!! "missing quote at end of string"
+    (change_prompt (Scan.repeat (scan_str q) --| $$$ q)) >> flat;
 
 in
 
 val scan_string = scan_strs "\"";
 val scan_alt_string = scan_strs "`";
 
-val scan_quoted = Scan.depend (fn pos =>
-  Scan.trace (Scan.pass pos (scan_string || scan_alt_string))
-    >> (fn (_, syms) => (Position.advance syms pos, syms)));
+val scan_quoted = Scan.trace (scan_string || scan_alt_string) >> #2;
 
 end;
 
@@ -254,13 +241,12 @@
 (* scan verbatim text *)
 
 val scan_verb =
-  $$ "*" --| Scan.ahead (~$$ "}") ||
-  Scan.one (fn s => s <> "*" andalso Symbol.is_regular s);
+  $$$ "*" --| Scan.ahead (~$$$ "}") ||
+  Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single;
 
 val scan_verbatim =
-  Scan.lift ($$ "{" |-- $$ "*") |-- !!! "missing end of verbatim text"
-    (Scan.lift (change_prompt (Scan.repeat scan_verb --| $$ "*" --| $$ "}")))
-  >> (fn body => Symbol.DEL :: Symbol.DEL :: body @ [Symbol.DEL, Symbol.DEL]);
+  $$$ "{" |-- $$$ "*" |-- !!! "missing end of verbatim text"
+    (change_prompt (Scan.repeat scan_verb --| $$$ "*" --| $$$ "}")) >> flat;
 
 
 (* scan space *)
@@ -268,90 +254,72 @@
 fun is_space s = Symbol.is_blank s andalso s <> "\n";
 
 val scan_space =
-  (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] ||
-    Scan.many is_space @@@ ($$ "\n" >> single));
+  Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] ||
+  Scan.many (is_space o symbol) @@@ $$$ "\n";
 
 
 (* scan nested comments *)
 
 val scan_cmt =
-  Scan.depend (fn d => $$ "(" -- $$ "*" >> (fn (a, b) => (d + 1, [a, b]))) ||
-  Scan.depend (fn 0 => Scan.fail | d => $$ "*" -- $$ ")" >> (fn (a, b) => (d - 1, [a, b]))) ||
-  Scan.lift ($$ "*" --| Scan.ahead (~$$ ")") >> single) ||
-  Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s) >> single);
+  Scan.depend (fn d => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
+  Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||
+  Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
+  Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single);
 
 val scan_comment =
-  Scan.lift ($$ "(" |-- $$ "*") |--
-    !!! "missing end of comment"
-      (Scan.lift (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt) --| $$ "*" --| $$ ")")))
-  >> (fn body => Symbol.DEL :: Symbol.DEL :: flat body @ [Symbol.DEL, Symbol.DEL]);
+  $$$ "(" |-- $$$ "*" |-- !!! "missing end of comment"
+    (change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat) --| $$$ "*" --| $$$ ")"));
 
 
 (* scan malformed symbols *)
 
 val scan_malformed =
-  $$ Symbol.malformed |--
-    change_prompt (Scan.many Symbol.is_regular)
-  --| Scan.option ($$ Symbol.end_malformed);
-
-
-(* scan token *)
-
-fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
-
-fun advance_range syms pos =
-  let val pos' = Position.advance syms pos
-  in (Position.encode_range (pos, pos'), pos') end;
-
-val clean_value = implode o filter_out (fn s => s = Symbol.DEL);
+  $$$ Symbol.malformed |--
+    change_prompt (Scan.many (Symbol.is_regular o symbol))
+  --| Scan.option ($$$ Symbol.end_malformed);
 
 
-fun scan (lex1, lex2) =
-  let
-    val scanner = Scan.depend (fn pos => Scan.pass pos
-      (scan_string >> pair String ||
-        scan_alt_string >> pair AltString ||
-        scan_verbatim >> pair Verbatim ||
-        scan_comment >> pair Comment ||
-        Scan.lift scan_space >> pair Space ||
-        Scan.lift scan_malformed >> pair Malformed ||
-        Scan.lift (Scan.one Symbol.is_sync >> K (Sync, [Symbol.sync])) ||
-        Scan.lift ((Scan.max token_leq
-          (Scan.max token_leq
-            (Scan.literal lex2 >> pair Command)
-            (Scan.literal lex1 >> pair Keyword))
-          (Syntax.scan_longid >> pair LongIdent ||
-            Syntax.scan_id >> pair Ident ||
-            Syntax.scan_var >> pair Var ||
-            Syntax.scan_tid >> pair TypeIdent ||
-            Syntax.scan_tvar >> pair TypeVar ||
-            Syntax.scan_nat >> pair Nat ||
-            scan_symid >> pair SymIdent)))) >>
-      (fn (k, syms) => 
-        let val (range_pos, pos') = advance_range syms pos
-        in (pos', Token (((range_pos, pos'), implode syms), (k, clean_value syms))) end));
 
-  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
+(** token sources **)
 
-
-(* token sources *)
+fun source_proper src = src |> Source.filter is_proper;
 
 local
 
-val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
+fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
+fun token (k, ss) = Token (SymbolPos.implode ss, (k, implode (map symbol ss)));
 
-fun recover msg = Scan.depend (fn pos => Scan.many is_junk >> (fn syms =>
-  let val (range_pos, pos') = advance_range syms pos
-  in (pos', [Token (((range_pos, pos'), implode syms), (Error msg, clean_value syms))]) end));
+fun scan (lex1, lex2) = !!! "bad input"
+  (scan_string >> pair String ||
+    scan_alt_string >> pair AltString ||
+    scan_verbatim >> pair Verbatim ||
+    SymbolPos.scan_comment_body !!! >> pair Comment ||
+    scan_space >> pair Space ||
+    scan_malformed >> pair Malformed ||
+    Scan.one (Symbol.is_sync o symbol) >> (fn s => (Sync, [s])) ||
+    ((Scan.max token_leq
+      (Scan.max token_leq
+        (Scan.literal lex2 >> pair Command)
+        (Scan.literal lex1 >> pair Keyword))
+      (Syntax.scan_longid >> pair LongIdent ||
+        Syntax.scan_id >> pair Ident ||
+        Syntax.scan_var >> pair Var ||
+        Syntax.scan_tid >> pair TypeIdent ||
+        Syntax.scan_tvar >> pair TypeVar ||
+        Syntax.scan_nat >> pair Nat ||
+        scan_symid >> pair SymIdent)))) >> token;
+
+fun recover msg =
+  Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol)
+  >> (fn ss => [token (Error msg, ss)]);
 
 in
 
 fun source do_recover get_lex pos src =
-  Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
-    (Option.map (rpair recover) do_recover) src;
+  SymbolPos.source pos src
+  |> Source.source SymbolPos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
+    (Option.map (rpair recover) do_recover);
 
 end;
 
-fun source_proper src = src |> Source.filter is_proper;
-
 end;