added count/counted: improved position handling for token syntax;
authorwenzelm
Mon, 28 Jan 2008 22:27:24 +0100
changeset 26004 2abb3005660f
parent 26003 7a8aee8353cf
child 26005 431ab3907291
added count/counted: improved position handling for token syntax; removed obsolete incr_line/keep_line/scan_blank; string tokens no longer admit escaped white space, which was an accidental (undocumented) feature;
src/Pure/Isar/outer_lex.ML
--- a/src/Pure/Isar/outer_lex.ML	Mon Jan 28 22:27:23 2008 +0100
+++ b/src/Pure/Isar/outer_lex.ML	Mon Jan 28 22:27:24 2008 +0100
@@ -33,10 +33,10 @@
   val text_of: token -> string * string
   val is_sid: string -> bool
   val !!! : string -> (Position.T * 'a -> 'b) -> Position.T * 'a -> 'b
-  val incr_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c)
-  val keep_line: ('a -> 'b * 'c) -> Position.T * 'a -> 'b * (Position.T * 'c)
-  val scan_blank: Position.T * Symbol.symbol list
-    -> Symbol.symbol * (Position.T * Symbol.symbol list)
+  val count: (Symbol.symbol list -> Symbol.symbol * Symbol.symbol list) ->
+    Position.T * Symbol.symbol list -> Symbol.symbol * (Position.T * Symbol.symbol list)
+  val counted: (Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list) ->
+    Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
   val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
   val scan: (Scan.lexicon * Scan.lexicon) ->
     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
@@ -186,14 +186,19 @@
 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
 
 
-(* line numbering *)
+(* position *)
+
+local
+
+fun map_position f (scan: Symbol.symbol list -> 'a * Symbol.symbol list)  =
+  Scan.depend (fn (pos: Position.T) => scan >> (fn x => (f x pos, x)));
 
-fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos));
-val keep_line = Scan.lift;
+in
 
-val scan_blank =
-  incr_line ($$ "\n") ||
-  keep_line (Scan.one Symbol.is_blank);
+fun count scan = map_position Position.advance scan;
+fun counted scan = map_position (fold Position.advance) scan >> implode;
+
+end;
 
 
 (* scan symbolic idents *)
@@ -201,8 +206,8 @@
 val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");
 
 val scan_symid =
-  Scan.many1 is_sym_char >> implode ||
-  Scan.one Symbol.is_symbolic;
+  Scan.many1 is_sym_char ||
+  Scan.one Symbol.is_symbolic >> single;
 
 fun is_symid str =
   (case try Symbol.explode str of
@@ -221,22 +226,21 @@
 local
 
 val char_code =
-  Scan.one Symbol.is_ascii_digit --
-  Scan.one Symbol.is_ascii_digit --
-  Scan.one Symbol.is_ascii_digit :|-- (fn ((a, b), c) =>
+  count (Scan.one Symbol.is_ascii_digit) --
+  count (Scan.one Symbol.is_ascii_digit) --
+  count (Scan.one Symbol.is_ascii_digit) :|--
+  (fn ((a, b), c) =>
     let val (n, _) = Library.read_int [a, b, c]
     in if n <= 255 then Scan.succeed (chr n) else Scan.fail end);
 
 fun scan_str q =
-  scan_blank ||
-  keep_line ($$ "\\") |-- !!! "bad escape character in string"
-      (scan_blank || keep_line ($$ q || $$ "\\" || char_code)) ||
-  keep_line (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s));
+  count ($$ "\\") |-- !!! "bad escape character in string" (count ($$ q || $$ "\\") || char_code) ||
+  count (Scan.one (fn s => s <> q andalso s <> "\\" andalso Symbol.is_regular s));
 
 fun scan_strs q =
-  keep_line ($$ q) |--
+  count ($$ q) |--
     !!! "missing quote at end of string"
-      (change_prompt ((Scan.repeat (scan_str q) >> implode) --| keep_line ($$ q)));
+      (change_prompt ((Scan.repeat (scan_str q) >> implode) --| count ($$ q)));
 
 in
 
@@ -249,14 +253,13 @@
 (* scan verbatim text *)
 
 val scan_verb =
-  scan_blank ||
-  keep_line ($$ "*" --| Scan.ahead (~$$ "}")) ||
-  keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s));
+  count ($$ "*" --| Scan.ahead (~$$ "}")) ||
+  count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s));
 
 val scan_verbatim =
-  keep_line ($$ "{" -- $$ "*") |--
+  count ($$ "{") |-- count ($$ "*") |--
     !!! "missing end of verbatim text"
-      (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "*" -- $$ "}")));
+      (change_prompt ((Scan.repeat scan_verb >> implode) --| count ($$ "*") --| count ($$ "}")));
 
 
 (* scan space *)
@@ -264,42 +267,31 @@
 fun is_space s = Symbol.is_blank s andalso s <> "\n";
 
 val scan_space =
-  (keep_line (Scan.many1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
-    keep_line (Scan.many is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
+  (Scan.many1 is_space @@@ Scan.optional ($$ "\n" >> single) [] ||
+    Scan.many is_space @@@ ($$ "\n" >> single));
 
 
 (* scan nested comments *)
 
 val scan_cmt =
-  Scan.lift scan_blank ||
-  Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
-  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
-  Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
-  Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
+  Scan.depend (fn d => count ($$ "(") ^^ count ($$ "*") >> pair (d + 1)) ||
+  Scan.depend (fn 0 => Scan.fail | d => count ($$ "*") ^^ count ($$ ")") >> pair (d - 1)) ||
+  Scan.lift (count ($$ "*" --| Scan.ahead (~$$ ")"))) ||
+  Scan.lift (count (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
 
 val scan_comment =
-  keep_line ($$ "(" -- $$ "*") |--
+  count ($$ "(") |-- count ($$ "*") |--
     !!! "missing end of comment"
       (change_prompt
-        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
+        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| count ($$ "*") --| count ($$ ")")));
 
 
 (* scan malformed symbols *)
 
-local
-
-val scan_mal =
-  scan_blank ||
-  keep_line (Scan.one Symbol.is_regular);
-
-in
-
 val scan_malformed =
-  keep_line ($$ Symbol.malformed) |--
-    change_prompt (Scan.repeat scan_mal >> implode)
-      --| keep_line (Scan.option ($$ Symbol.end_malformed));
-
-end;
+  $$ Symbol.malformed |--
+    change_prompt (Scan.many Symbol.is_regular)
+  --| Scan.option ($$ Symbol.end_malformed);
 
 
 (* scan token *)
@@ -314,21 +306,21 @@
         scan_string >> token String ||
         scan_alt_string >> token AltString ||
         scan_verbatim >> token Verbatim ||
-        scan_space >> token Space ||
         scan_comment >> token Comment ||
-        scan_malformed >> token Malformed ||
-        keep_line (Scan.one Symbol.is_sync >> sync) ||
-        keep_line (Scan.max token_leq
+        counted scan_space >> token Space ||
+        counted scan_malformed >> token Malformed ||
+        Scan.lift (Scan.one Symbol.is_sync >> sync) ||
+        (Scan.max token_leq
           (Scan.max token_leq
-            (Scan.literal lex1 >> (token Keyword o implode))
-            (Scan.literal lex2 >> (token Command o implode)))
-          (Syntax.scan_longid >> token LongIdent ||
-            Syntax.scan_id >> token Ident ||
-            Syntax.scan_var >> token Var ||
-            Syntax.scan_tid >> token TypeIdent ||
-            Syntax.scan_tvar >> token TypeVar ||
-            Syntax.scan_nat >> token Nat ||
-            scan_symid >> token SymIdent))
+            (counted (Scan.literal lex1) >> token Keyword)
+            (counted (Scan.literal lex2) >> token Command))
+          (counted Syntax.scan_longid >> token LongIdent ||
+            counted Syntax.scan_id >> token Ident ||
+            counted Syntax.scan_var >> token Var ||
+            counted Syntax.scan_tid >> token TypeIdent ||
+            counted Syntax.scan_tvar >> token TypeVar ||
+            counted Syntax.scan_nat >> token Nat ||
+            counted scan_symid >> token SymIdent))
       end);
   in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
 
@@ -340,7 +332,7 @@
 val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
 
 fun recover msg = Scan.state :|-- (fn pos =>
-  keep_line (Scan.many is_junk) >> (fn xs => [Token (pos, (Error msg, implode xs))]));
+  counted (Scan.many is_junk) >> (fn s => [Token (pos, (Error msg, s))]));
 
 in