clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
authorwenzelm
Sun, 04 Mar 2012 16:02:14 +0100
changeset 46811 03a2dc9e0624
parent 46810 a910e12fca85
child 46812 3d55ef732cd7
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2); simplified signatures;
src/Pure/Isar/token.ML
src/Pure/ProofGeneral/pgip_parser.ML
src/Pure/Thy/latex.ML
src/Pure/Thy/thy_load.ML
src/Pure/Thy/thy_output.ML
src/Pure/Thy/thy_syntax.ML
src/Pure/Thy/thy_syntax.scala
--- a/src/Pure/Isar/token.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Isar/token.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -28,6 +28,7 @@
   val is_kind: kind -> T -> bool
   val keyword_with: (string -> bool) -> T -> bool
   val ident_with: (string -> bool) -> T -> bool
+  val is_command: T -> bool
   val is_proper: T -> bool
   val is_semicolon: T -> bool
   val is_comment: T -> bool
@@ -153,6 +154,8 @@
 fun kind_of (Token (_, (k, _), _)) = k;
 fun is_kind k (Token (_, (k', _), _)) = k = k';
 
+val is_command = is_kind Command;
+
 fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x
   | keyword_with _ _ = false;
 
--- a/src/Pure/ProofGeneral/pgip_parser.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/ProofGeneral/pgip_parser.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -103,6 +103,8 @@
 
 
 fun pgip_parser pos str =
-  maps parse_span (Thy_Syntax.parse_spans (Keyword.get_lexicons ()) pos str);
+  Thy_Syntax.parse_tokens (Keyword.get_lexicons ()) pos str
+  |> Thy_Syntax.parse_spans
+  |> maps parse_span;
 
 end;
--- a/src/Pure/Thy/latex.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/latex.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -130,7 +130,7 @@
 fun output_basic tok =
   let val s = Token.content_of tok in
     if invisible_token tok then ""
-    else if Token.is_kind Token.Command tok then
+    else if Token.is_command tok then
       "\\isacommand{" ^ output_syms s ^ "}"
     else if Token.is_kind Token.Keyword tok andalso Lexicon.is_ascii_identifier s then
       "\\isakeyword{" ^ output_syms s ^ "}"
--- a/src/Pure/Thy/thy_load.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_load.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -176,11 +176,10 @@
       begin_theory dir name imports uses parents
       |> Present.begin_theory update_time dir uses;
 
-    val toks = Source.exhausted (Thy_Syntax.token_source lexs pos (Source.of_string text));
-    val spans = Source.exhaust (Thy_Syntax.span_source toks);
+    val toks = Thy_Syntax.parse_tokens lexs pos text;
+    val spans = Thy_Syntax.parse_spans toks;
     val elements =
-      Source.exhaust (Thy_Syntax.element_source (Source.of_list spans))
-      |> maps (Outer_Syntax.read_element outer_syntax init);
+      maps (Outer_Syntax.read_element outer_syntax init) (Thy_Syntax.parse_elements spans);
 
     val _ = Present.theory_source name
       (fn () => HTML.html_mode (implode o map Thy_Syntax.present_span) spans);
--- a/src/Pure/Thy/thy_output.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_output.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -33,7 +33,7 @@
   val eval_antiq: Scan.lexicon -> Toplevel.state -> Symbol_Pos.T list * Position.range -> string
   val eval_antiquote: Scan.lexicon -> Toplevel.state -> Symbol_Pos.text * Position.T -> string
   val present_thy: Scan.lexicon -> (string -> string list) -> (markup -> string -> bool) ->
-    (Toplevel.transition * Toplevel.state) list -> (Token.T, 'a) Source.source -> Buffer.T
+    (Toplevel.transition * Toplevel.state) list -> Token.T list -> Buffer.T
   val pretty_text: Proof.context -> string -> Pretty.T
   val pretty_term: Proof.context -> term -> Pretty.T
   val pretty_thm: Proof.context -> thm -> Pretty.T
@@ -366,7 +366,7 @@
 
 in
 
-fun present_thy lex default_tags is_markup command_results src =
+fun present_thy lex default_tags is_markup command_results toks =
   let
     (* tokens *)
 
@@ -375,8 +375,7 @@
 
     fun markup mark mk flag = Scan.peek (fn d =>
       improper |--
-        Parse.position
-          (Scan.one (Token.is_kind Token.Command andf is_markup mark o Token.content_of)) --
+        Parse.position (Scan.one (Token.is_command andf is_markup mark o Token.content_of)) --
       Scan.repeat tag --
       Parse.!!!! ((improper -- locale -- improper) |-- Parse.doc_source --| improper_end)
       >> (fn (((tok, pos), tags), txt) =>
@@ -384,7 +383,7 @@
         in (SOME (name, pos, tags), (mk (name, txt), (flag, d))) end));
 
     val command = Scan.peek (fn d =>
-      Parse.position (Scan.one (Token.is_kind Token.Command)) --
+      Parse.position (Scan.one (Token.is_command)) --
       Scan.repeat tag
       >> (fn ((tok, pos), tags) =>
         let val name = Token.content_of tok
@@ -429,8 +428,7 @@
           make_span (the cmd) (toks1 @ (tok2 :: (toks3 @ the_default [] tok4))));
 
     val spans =
-      src
-      |> Source.filter (not o Token.is_semicolon)
+      Source.of_list (filter_out Token.is_semicolon toks)
       |> Source.source' 0 Token.stopper (Scan.error (Scan.bulk token)) NONE
       |> Source.source stopper (Scan.error (Scan.bulk span)) NONE
       |> Source.exhaust;
--- a/src/Pure/Thy/thy_syntax.ML	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_syntax.ML	Sun Mar 04 16:02:14 2012 +0100
@@ -6,9 +6,6 @@
 
 signature THY_SYNTAX =
 sig
-  val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source ->
-    (Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a)
-      Source.source) Source.source) Source.source) Source.source
   val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list
   val present_token: Token.T -> Output.output
   val reports_of_token: Token.T -> Position.report list
@@ -16,12 +13,10 @@
   type span
   val span_kind: span -> span_kind
   val span_content: span -> Token.T list
-  val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source
-  val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list
+  val parse_spans: Token.T list -> span list
   val present_span: span -> Output.output
   type element = {head: span, proof: span list, proper_proof: bool}
-  val element_source: (span, 'a) Source.source ->
-    (element, (span, 'a) Source.source) Source.source
+  val parse_elements: span list -> element list
 end;
 
 structure Thy_Syntax: THY_SYNTAX =
@@ -31,11 +26,11 @@
 
 (* parse *)
 
-fun token_source lexs pos =
-  Symbol.source #> Token.source {do_recover = SOME false} (K lexs) pos;
-
 fun parse_tokens lexs pos =
-  Source.of_string #> token_source lexs pos #> Source.exhaust;
+  Source.of_string #>
+  Symbol.source #>
+  Token.source {do_recover = SOME false} (K lexs) pos #>
+  Source.exhaust;
 
 
 (* present *)
@@ -105,29 +100,23 @@
 
 local
 
-val whitespace = Scan.many (not o Token.is_proper);
-val whitespace1 = Scan.many1 (not o Token.is_proper);
-
-val body = Scan.unless (whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof;
+fun make_span toks =
+  if not (null toks) andalso Token.is_command (hd toks) then
+    Span (Command (Token.content_of (hd toks)), toks)
+  else if forall (not o Token.is_proper) toks then Span (Ignored, toks)
+  else Span (Malformed, toks);
 
-val span =
-  Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body
-    >> (fn ((name, c), bs) => Span (Command name, c :: bs)) ||
-  whitespace1 >> (fn toks => Span (Ignored, toks)) ||
-  Scan.repeat1 body >> (fn toks => Span (Malformed, toks));
+fun flush (result, span) = if null span then (result, span) else (rev span :: result, []);
 
 in
 
-fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src;
+fun parse_spans toks =
+  fold (fn tok => Token.is_command tok ? flush #> apsnd (cons tok)) toks ([], [])
+  |> flush
+  |> #1 |> rev |> map make_span;
 
 end;
 
-fun parse_spans lexs pos str =
-  Source.of_string str
-  |> token_source lexs pos
-  |> span_source
-  |> Source.exhaust;
-
 
 (* present *)
 
@@ -166,7 +155,7 @@
 val stopper = Scan.stopper (K eof) is_eof;
 
 
-(* element_source *)
+(* parse *)
 
 local
 
@@ -187,7 +176,10 @@
 
 in
 
-fun element_source src = Source.source stopper (Scan.bulk element) NONE src;
+val parse_elements =
+  Source.of_list #>
+  Source.source stopper (Scan.bulk element) NONE #>
+  Source.exhaust;
 
 end;
 
--- a/src/Pure/Thy/thy_syntax.scala	Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_syntax.scala	Sun Mar 04 16:02:14 2012 +0100
@@ -27,8 +27,7 @@
       def length: Int = command.length
     }
 
-    def parse(syntax: Outer_Syntax, node_name: Document.Node.Name, text: CharSequence)
-      : Entry =
+    def parse(syntax: Outer_Syntax, node_name: Document.Node.Name, text: CharSequence): Entry =
     {
       /* stack operations */
 
@@ -82,18 +81,10 @@
   {
     val result = new mutable.ListBuffer[List[Token]]
     val span = new mutable.ListBuffer[Token]
-    val whitespace = new mutable.ListBuffer[Token]
 
-    def flush(buffer: mutable.ListBuffer[Token])
-    {
-      if (!buffer.isEmpty) { result += buffer.toList; buffer.clear }
-    }
-    for (tok <- toks) {
-      if (tok.is_command) { flush(span); flush(whitespace); span += tok }
-      else if (tok.is_ignored) whitespace += tok
-      else { span ++= whitespace; whitespace.clear; span += tok }
-    }
-    flush(span); flush(whitespace)
+    def flush() { if (!span.isEmpty) { result += span.toList; span.clear } }
+    for (tok <- toks) { if (tok.is_command) flush(); span += tok }
+    flush()
     result.toList
   }