clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
simplified signatures;
--- a/src/Pure/Isar/token.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Isar/token.ML Sun Mar 04 16:02:14 2012 +0100
@@ -28,6 +28,7 @@
val is_kind: kind -> T -> bool
val keyword_with: (string -> bool) -> T -> bool
val ident_with: (string -> bool) -> T -> bool
+ val is_command: T -> bool
val is_proper: T -> bool
val is_semicolon: T -> bool
val is_comment: T -> bool
@@ -153,6 +154,8 @@
fun kind_of (Token (_, (k, _), _)) = k;
fun is_kind k (Token (_, (k', _), _)) = k = k';
+val is_command = is_kind Command;
+
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x
| keyword_with _ _ = false;
--- a/src/Pure/ProofGeneral/pgip_parser.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/ProofGeneral/pgip_parser.ML Sun Mar 04 16:02:14 2012 +0100
@@ -103,6 +103,8 @@
fun pgip_parser pos str =
- maps parse_span (Thy_Syntax.parse_spans (Keyword.get_lexicons ()) pos str);
+ Thy_Syntax.parse_tokens (Keyword.get_lexicons ()) pos str
+ |> Thy_Syntax.parse_spans
+ |> maps parse_span;
end;
--- a/src/Pure/Thy/latex.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/latex.ML Sun Mar 04 16:02:14 2012 +0100
@@ -130,7 +130,7 @@
fun output_basic tok =
let val s = Token.content_of tok in
if invisible_token tok then ""
- else if Token.is_kind Token.Command tok then
+ else if Token.is_command tok then
"\\isacommand{" ^ output_syms s ^ "}"
else if Token.is_kind Token.Keyword tok andalso Lexicon.is_ascii_identifier s then
"\\isakeyword{" ^ output_syms s ^ "}"
--- a/src/Pure/Thy/thy_load.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_load.ML Sun Mar 04 16:02:14 2012 +0100
@@ -176,11 +176,10 @@
begin_theory dir name imports uses parents
|> Present.begin_theory update_time dir uses;
- val toks = Source.exhausted (Thy_Syntax.token_source lexs pos (Source.of_string text));
- val spans = Source.exhaust (Thy_Syntax.span_source toks);
+ val toks = Thy_Syntax.parse_tokens lexs pos text;
+ val spans = Thy_Syntax.parse_spans toks;
val elements =
- Source.exhaust (Thy_Syntax.element_source (Source.of_list spans))
- |> maps (Outer_Syntax.read_element outer_syntax init);
+ maps (Outer_Syntax.read_element outer_syntax init) (Thy_Syntax.parse_elements spans);
val _ = Present.theory_source name
(fn () => HTML.html_mode (implode o map Thy_Syntax.present_span) spans);
--- a/src/Pure/Thy/thy_output.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_output.ML Sun Mar 04 16:02:14 2012 +0100
@@ -33,7 +33,7 @@
val eval_antiq: Scan.lexicon -> Toplevel.state -> Symbol_Pos.T list * Position.range -> string
val eval_antiquote: Scan.lexicon -> Toplevel.state -> Symbol_Pos.text * Position.T -> string
val present_thy: Scan.lexicon -> (string -> string list) -> (markup -> string -> bool) ->
- (Toplevel.transition * Toplevel.state) list -> (Token.T, 'a) Source.source -> Buffer.T
+ (Toplevel.transition * Toplevel.state) list -> Token.T list -> Buffer.T
val pretty_text: Proof.context -> string -> Pretty.T
val pretty_term: Proof.context -> term -> Pretty.T
val pretty_thm: Proof.context -> thm -> Pretty.T
@@ -366,7 +366,7 @@
in
-fun present_thy lex default_tags is_markup command_results src =
+fun present_thy lex default_tags is_markup command_results toks =
let
(* tokens *)
@@ -375,8 +375,7 @@
fun markup mark mk flag = Scan.peek (fn d =>
improper |--
- Parse.position
- (Scan.one (Token.is_kind Token.Command andf is_markup mark o Token.content_of)) --
+ Parse.position (Scan.one (Token.is_command andf is_markup mark o Token.content_of)) --
Scan.repeat tag --
Parse.!!!! ((improper -- locale -- improper) |-- Parse.doc_source --| improper_end)
>> (fn (((tok, pos), tags), txt) =>
@@ -384,7 +383,7 @@
in (SOME (name, pos, tags), (mk (name, txt), (flag, d))) end));
val command = Scan.peek (fn d =>
- Parse.position (Scan.one (Token.is_kind Token.Command)) --
+ Parse.position (Scan.one (Token.is_command)) --
Scan.repeat tag
>> (fn ((tok, pos), tags) =>
let val name = Token.content_of tok
@@ -429,8 +428,7 @@
make_span (the cmd) (toks1 @ (tok2 :: (toks3 @ the_default [] tok4))));
val spans =
- src
- |> Source.filter (not o Token.is_semicolon)
+ Source.of_list (filter_out Token.is_semicolon toks)
|> Source.source' 0 Token.stopper (Scan.error (Scan.bulk token)) NONE
|> Source.source stopper (Scan.error (Scan.bulk span)) NONE
|> Source.exhaust;
--- a/src/Pure/Thy/thy_syntax.ML Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_syntax.ML Sun Mar 04 16:02:14 2012 +0100
@@ -6,9 +6,6 @@
signature THY_SYNTAX =
sig
- val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source ->
- (Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a)
- Source.source) Source.source) Source.source) Source.source
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list
val present_token: Token.T -> Output.output
val reports_of_token: Token.T -> Position.report list
@@ -16,12 +13,10 @@
type span
val span_kind: span -> span_kind
val span_content: span -> Token.T list
- val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source
- val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list
+ val parse_spans: Token.T list -> span list
val present_span: span -> Output.output
type element = {head: span, proof: span list, proper_proof: bool}
- val element_source: (span, 'a) Source.source ->
- (element, (span, 'a) Source.source) Source.source
+ val parse_elements: span list -> element list
end;
structure Thy_Syntax: THY_SYNTAX =
@@ -31,11 +26,11 @@
(* parse *)
-fun token_source lexs pos =
- Symbol.source #> Token.source {do_recover = SOME false} (K lexs) pos;
-
fun parse_tokens lexs pos =
- Source.of_string #> token_source lexs pos #> Source.exhaust;
+ Source.of_string #>
+ Symbol.source #>
+ Token.source {do_recover = SOME false} (K lexs) pos #>
+ Source.exhaust;
(* present *)
@@ -105,29 +100,23 @@
local
-val whitespace = Scan.many (not o Token.is_proper);
-val whitespace1 = Scan.many1 (not o Token.is_proper);
-
-val body = Scan.unless (whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof;
+fun make_span toks =
+ if not (null toks) andalso Token.is_command (hd toks) then
+ Span (Command (Token.content_of (hd toks)), toks)
+ else if forall (not o Token.is_proper) toks then Span (Ignored, toks)
+ else Span (Malformed, toks);
-val span =
- Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body
- >> (fn ((name, c), bs) => Span (Command name, c :: bs)) ||
- whitespace1 >> (fn toks => Span (Ignored, toks)) ||
- Scan.repeat1 body >> (fn toks => Span (Malformed, toks));
+fun flush (result, span) = if null span then (result, span) else (rev span :: result, []);
in
-fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src;
+fun parse_spans toks =
+ fold (fn tok => Token.is_command tok ? flush #> apsnd (cons tok)) toks ([], [])
+ |> flush
+ |> #1 |> rev |> map make_span;
end;
-fun parse_spans lexs pos str =
- Source.of_string str
- |> token_source lexs pos
- |> span_source
- |> Source.exhaust;
-
(* present *)
@@ -166,7 +155,7 @@
val stopper = Scan.stopper (K eof) is_eof;
-(* element_source *)
+(* parse *)
local
@@ -187,7 +176,10 @@
in
-fun element_source src = Source.source stopper (Scan.bulk element) NONE src;
+val parse_elements =
+ Source.of_list #>
+ Source.source stopper (Scan.bulk element) NONE #>
+ Source.exhaust;
end;
--- a/src/Pure/Thy/thy_syntax.scala Sun Mar 04 11:03:10 2012 +0100
+++ b/src/Pure/Thy/thy_syntax.scala Sun Mar 04 16:02:14 2012 +0100
@@ -27,8 +27,7 @@
def length: Int = command.length
}
- def parse(syntax: Outer_Syntax, node_name: Document.Node.Name, text: CharSequence)
- : Entry =
+ def parse(syntax: Outer_Syntax, node_name: Document.Node.Name, text: CharSequence): Entry =
{
/* stack operations */
@@ -82,18 +81,10 @@
{
val result = new mutable.ListBuffer[List[Token]]
val span = new mutable.ListBuffer[Token]
- val whitespace = new mutable.ListBuffer[Token]
- def flush(buffer: mutable.ListBuffer[Token])
- {
- if (!buffer.isEmpty) { result += buffer.toList; buffer.clear }
- }
- for (tok <- toks) {
- if (tok.is_command) { flush(span); flush(whitespace); span += tok }
- else if (tok.is_ignored) whitespace += tok
- else { span ++= whitespace; whitespace.clear; span += tok }
- }
- flush(span); flush(whitespace)
+ def flush() { if (!span.isEmpty) { result += span.toList; span.clear } }
+ for (tok <- toks) { if (tok.is_command) flush(); span += tok }
+ flush()
result.toList
}