proper range for Antiq tokens;
authorwenzelm
Fri, 26 Sep 2014 15:10:02 +0200
changeset 58465 bd06c6479748
parent 58464 5e7fc9974aba
child 58466 dde28333367f
proper range for Antiq tokens; more detailed parse trees; report sub-expressions;
src/Pure/Tools/rail.ML
--- a/src/Pure/Tools/rail.ML	Fri Sep 26 15:05:11 2014 +0200
+++ b/src/Pure/Tools/rail.ML	Fri Sep 26 15:10:02 2014 +0200
@@ -36,6 +36,11 @@
 fun pos_of (Token ((pos, _), _)) = pos;
 fun end_pos_of (Token ((_, pos), _)) = pos;
 
+fun range_of (toks as tok :: _) =
+      let val pos' = end_pos_of (List.last toks)
+      in Position.range (pos_of tok) pos' end
+  | range_of [] = Position.no_range;
+
 fun kind_of (Token (_, (k, _))) = k;
 fun content_of (Token (_, (_, x))) = x;
 
@@ -80,6 +85,9 @@
 
 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
 
+fun antiq_token (antiq as (ss, {range, ...})) =
+  [Token (range, (Antiq antiq, Symbol_Pos.content ss))];
+
 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
 
 val scan_keyword =
@@ -89,7 +97,7 @@
 
 val scan_token =
   scan_space >> K [] ||
-  Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) ||
+  Antiquote.scan_antiq >> antiq_token ||
   scan_keyword >> (token Keyword o single) ||
   Lexicon.scan_id >> token Ident ||
   Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
@@ -110,6 +118,8 @@
 
 (** parsing **)
 
+(* parser combinators *)
+
 fun !!! scan =
   let
     val prefix = "Rail syntax error";
@@ -140,6 +150,78 @@
 
 val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
 
+fun RANGE scan = Scan.trace scan >> apsnd range_of;
+fun RANGE_APP scan = RANGE scan >> (fn (f, r) => f r);
+
+
+(* parse trees *)
+
+datatype trees =
+  CAT of tree list * Position.range
+and tree =
+  BAR of trees list * Position.range |
+  STAR of (trees * trees) * Position.range |
+  PLUS of (trees * trees) * Position.range |
+  MAYBE of tree * Position.range |
+  NEWLINE of Position.range |
+  NONTERMINAL of string * Position.range |
+  TERMINAL of (bool * string) * Position.range |
+  ANTIQUOTE of (bool * Antiquote.antiq) * Position.range;
+
+fun reports_of_tree ctxt =
+  if Context_Position.is_visible ctxt then
+    let
+      fun reports r =
+        if r = Position.no_range then []
+        else [(Position.set_range r, Markup.expression)];
+      fun trees (CAT (ts, r)) = reports r @ maps tree ts
+      and tree (BAR (Ts, r)) = reports r @ maps trees Ts
+        | tree (STAR ((T1, T2), r)) = reports r @ trees T1 @ trees T2
+        | tree (PLUS ((T1, T2), r)) = reports r @ trees T1 @ trees T2
+        | tree (MAYBE (t, r)) = reports r @ tree t
+        | tree (NEWLINE r) = reports r
+        | tree (NONTERMINAL (_, r)) = reports r
+        | tree (TERMINAL (_, r)) = reports r
+        | tree (ANTIQUOTE (_, r)) = reports r;
+    in distinct (op =) o tree end
+  else K [];
+
+local
+
+val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
+
+fun body x = (RANGE (enum1 "|" body1) >> BAR) x
+and body0 x = (RANGE (enum "|" body1) >> BAR) x
+and body1 x =
+ (RANGE_APP (body2 :|-- (fn a =>
+   $$$ "*" |-- !!! body4e >> (fn b => fn r => CAT ([STAR ((a, b), r)], r)) ||
+   $$$ "+" |-- !!! body4e >> (fn b => fn r => CAT ([PLUS ((a, b), r)], r)) ||
+   Scan.succeed (K a)))) x
+and body2 x = (RANGE (Scan.repeat1 body3) >> CAT) x
+and body3 x =
+ (RANGE_APP (body4 :|-- (fn a =>
+   $$$ "?" >> K (curry MAYBE a) ||
+   Scan.succeed (K a)))) x
+and body4 x =
+ ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
+  RANGE_APP
+   ($$$ "\<newline>" >> K NEWLINE ||
+    ident >> curry NONTERMINAL ||
+    at_mode -- string >> curry TERMINAL ||
+    at_mode -- antiq >> curry ANTIQUOTE)) x
+and body4e x =
+  (RANGE (Scan.option body4) >> (fn (a, r) => CAT (the_list a, r))) x;
+
+val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
+val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
+val rules = enum1 ";" (Scan.option rule) >> map_filter I;
+
+in
+
+fun parse_rules toks =
+  #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks);
+
+end;
 
 
 (** rail expressions **)
@@ -156,68 +238,58 @@
   Terminal of bool * string |
   Antiquote of bool * Antiquote.antiq;
 
-fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
-and reverse (Bar cats) = Bar (map reverse_cat cats)
-  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
-  | reverse x = x;
+fun is_newline (Newline _) = true | is_newline _ = false;
+
+
+(* prepare *)
+
+local
 
 fun cat rails = Cat (0, rails);
 
 val empty = cat [];
 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
 
-fun is_newline (Newline _) = true | is_newline _ = false;
-
 fun bar [Cat (_, [rail])] = rail
   | bar cats = Bar cats;
 
-fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
+fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
+and reverse (Bar cats) = Bar (map reverse_cat cats)
+  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
+  | reverse x = x;
+
+fun plus (cat1, cat2) = Plus (cat1, reverse_cat cat2);
+
+in
 
-fun star cat1 cat2 =
-  if is_empty cat2 then plus empty cat1
-  else bar [empty, cat [plus cat1 cat2]];
+fun prepare_trees (CAT (ts, _)) = Cat (0, map prepare_tree ts)
+and prepare_tree (BAR (Ts, _)) = bar (map prepare_trees Ts)
+  | prepare_tree (STAR (Ts, _)) =
+      let val (cat1, cat2) = pairself prepare_trees Ts in
+        if is_empty cat2 then plus (empty, cat1)
+        else bar [empty, cat [plus (cat1, cat2)]]
+      end
+  | prepare_tree (PLUS (Ts, _)) = plus (pairself prepare_trees Ts)
+  | prepare_tree (MAYBE (t, _)) = bar [empty, cat [prepare_tree t]]
+  | prepare_tree (NEWLINE _) = Newline 0
+  | prepare_tree (NONTERMINAL (a, _)) = Nonterminal a
+  | prepare_tree (TERMINAL (a, _)) = Terminal a
+  | prepare_tree (ANTIQUOTE (a, _)) = Antiquote a;
 
-fun maybe rail = bar [empty, cat [rail]];
+end;
 
 
 (* read *)
 
-local
-
-val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
-
-fun body x = (enum1 "|" body1 >> bar) x
-and body0 x = (enum "|" body1 >> bar) x
-and body1 x =
- (body2 :|-- (fn a =>
-   $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
-   $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
-   Scan.succeed a)) x
-and body2 x = (Scan.repeat1 body3 >> cat) x
-and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
-and body4 x =
- ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
-  $$$ "\<newline>" >> K (Newline 0) ||
-  ident >> Nonterminal ||
-  at_mode -- string >> Terminal ||
-  at_mode -- antiq >> Antiquote) x
-and body4e x = (Scan.option body4 >> (cat o the_list)) x;
-
-val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
-val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
-val rules = enum1 ";" (Scan.option rule) >> map_filter I;
-
-in
-
 fun read ctxt (source: Symbol_Pos.source) =
   let
     val {text, pos, ...} = source;
     val _ = Context_Position.report ctxt pos Markup.language_rail;
     val toks = tokenize (Symbol_Pos.explode (text, pos));
     val _ = Context_Position.reports ctxt (maps reports_of_token toks);
-  in #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks) end;
-
-end;
+    val rules = parse_rules toks;
+    val _ = Position.reports (maps (reports_of_tree ctxt o #2) rules);
+  in map (apsnd prepare_tree) rules end;
 
 
 (* latex output *)