author | wenzelm |
Sat, 13 Aug 2011 15:59:26 +0200 | |
changeset 44182 | ecb51b457064 |
parent 44180 | a6dc270d3edb |
child 44185 | 05641edb5d30 |
permissions | -rw-r--r-- |
34268 | 1 |
/* Title: Pure/Thy/thy_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
38374 | 4 |
Superficial theory syntax: tokens and spans. |
34268 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
10 |
import scala.collection.mutable |
38374 | 11 |
import scala.annotation.tailrec |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
12 |
|
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
13 |
|
34303 | 14 |
object Thy_Syntax |
34268 | 15 |
{ |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
16 |
/** nested structure **/ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
17 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
18 |
object Structure |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
19 |
{ |
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
20 |
sealed abstract class Entry { def length: Int } |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
21 |
case class Block(val name: String, val body: List[Entry]) extends Entry |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
22 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
23 |
val length: Int = (0 /: body)(_ + _.length) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
24 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
25 |
case class Atom(val command: Command) extends Entry |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
26 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
27 |
def length: Int = command.length |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
28 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
29 |
|
40792 | 30 |
def parse(syntax: Outer_Syntax, root_name: String, text: CharSequence): Entry = |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
31 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
32 |
/* stack operations */ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
33 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
34 |
def buffer(): mutable.ListBuffer[Entry] = new mutable.ListBuffer[Entry] |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
35 |
var stack: List[(Int, String, mutable.ListBuffer[Entry])] = List((0, root_name, buffer())) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
36 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
37 |
@tailrec def close(level: Int => Boolean) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
38 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
39 |
stack match { |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
40 |
case (lev, name, body) :: (_, _, body2) :: rest if level(lev) => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
41 |
body2 += Block(name, body.toList) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
42 |
stack = stack.tail |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
43 |
close(level) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
44 |
case _ => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
45 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
46 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
47 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
48 |
def result(): Entry = |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
49 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
50 |
close(_ => true) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
51 |
val (_, name, body) = stack.head |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
52 |
Block(name, body.toList) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
53 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
54 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
55 |
def add(command: Command) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
56 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
57 |
syntax.heading_level(command) match { |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
58 |
case Some(i) => |
40457 | 59 |
close(_ >= i) |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
60 |
stack = (i, command.source, buffer()) :: stack |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
61 |
case None => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
62 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
63 |
stack.head._3 += Atom(command) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
64 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
65 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
66 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
67 |
/* result structure */ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
68 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
69 |
val spans = parse_spans(syntax.scan(text)) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
70 |
spans.foreach(span => add(Command.span(span))) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
71 |
result() |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
72 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
73 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
74 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
75 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
76 |
|
38374 | 77 |
/** parse spans **/ |
78 |
||
38373 | 79 |
def parse_spans(toks: List[Token]): List[List[Token]] = |
34268 | 80 |
{ |
38373 | 81 |
val result = new mutable.ListBuffer[List[Token]] |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
82 |
val span = new mutable.ListBuffer[Token] |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
83 |
val whitespace = new mutable.ListBuffer[Token] |
34268 | 84 |
|
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
85 |
def flush(buffer: mutable.ListBuffer[Token]) |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
86 |
{ |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
87 |
if (!buffer.isEmpty) { result += buffer.toList; buffer.clear } |
34268 | 88 |
} |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
89 |
for (tok <- toks) { |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
90 |
if (tok.is_command) { flush(span); flush(whitespace); span += tok } |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
91 |
else if (tok.is_ignored) whitespace += tok |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
92 |
else { span ++= whitespace; whitespace.clear; span += tok } |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
93 |
} |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
94 |
flush(span); flush(whitespace) |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
95 |
result.toList |
34268 | 96 |
} |
38374 | 97 |
|
98 |
||
99 |
||
100 |
/** text edits **/ |
|
101 |
||
43722 | 102 |
def text_edits( |
103 |
syntax: Outer_Syntax, |
|
104 |
previous: Document.Version, |
|
44157
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
105 |
edits: List[Document.Edit_Text]) |
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
106 |
: (List[Document.Edit_Command], Document.Version) = |
38374 | 107 |
{ |
108 |
/* phase 1: edit individual command source */ |
|
109 |
||
38425 | 110 |
@tailrec def edit_text(eds: List[Text.Edit], commands: Linear_Set[Command]) |
38374 | 111 |
: Linear_Set[Command] = |
112 |
{ |
|
113 |
eds match { |
|
114 |
case e :: es => |
|
115 |
Document.Node.command_starts(commands.iterator).find { |
|
116 |
case (cmd, cmd_start) => |
|
117 |
e.can_edit(cmd.source, cmd_start) || |
|
118 |
e.is_insert && e.start == cmd_start + cmd.length |
|
119 |
} match { |
|
120 |
case Some((cmd, cmd_start)) if e.can_edit(cmd.source, cmd_start) => |
|
121 |
val (rest, text) = e.edit(cmd.source, cmd_start) |
|
122 |
val new_commands = commands.insert_after(Some(cmd), Command.unparsed(text)) - cmd |
|
123 |
edit_text(rest.toList ::: es, new_commands) |
|
124 |
||
125 |
case Some((cmd, cmd_start)) => |
|
126 |
edit_text(es, commands.insert_after(Some(cmd), Command.unparsed(e.text))) |
|
127 |
||
128 |
case None => |
|
129 |
require(e.is_insert && e.start == 0) |
|
130 |
edit_text(es, commands.insert_after(None, Command.unparsed(e.text))) |
|
131 |
} |
|
132 |
case Nil => commands |
|
133 |
} |
|
134 |
} |
|
135 |
||
136 |
||
137 |
/* phase 2: recover command spans */ |
|
138 |
||
139 |
@tailrec def recover_spans(commands: Linear_Set[Command]): Linear_Set[Command] = |
|
140 |
{ |
|
141 |
commands.iterator.find(_.is_unparsed) match { |
|
142 |
case Some(first_unparsed) => |
|
143 |
val first = |
|
38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
144 |
commands.reverse_iterator(first_unparsed). |
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
145 |
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.head |
38374 | 146 |
val last = |
38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
147 |
commands.iterator(first_unparsed). |
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
148 |
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.last |
38374 | 149 |
val range = |
150 |
commands.iterator(first).takeWhile(_ != last).toList ::: List(last) |
|
151 |
||
152 |
val sources = range.flatMap(_.span.map(_.source)) |
|
43647 | 153 |
val spans0 = parse_spans(syntax.scan(sources.mkString)) |
38374 | 154 |
|
155 |
val (before_edit, spans1) = |
|
156 |
if (!spans0.isEmpty && first.is_command && first.span == spans0.head) |
|
157 |
(Some(first), spans0.tail) |
|
158 |
else (commands.prev(first), spans0) |
|
159 |
||
160 |
val (after_edit, spans2) = |
|
161 |
if (!spans1.isEmpty && last.is_command && last.span == spans1.last) |
|
162 |
(Some(last), spans1.take(spans1.length - 1)) |
|
163 |
else (commands.next(last), spans1) |
|
164 |
||
43662
e3175ec00311
Document.no_id/new_id as in ML (new_id *could* be session-specific but it isn't right now);
wenzelm
parents:
43660
diff
changeset
|
165 |
val inserted = spans2.map(span => new Command(Document.new_id(), span)) |
38374 | 166 |
val new_commands = |
167 |
commands.delete_between(before_edit, after_edit).append_after(before_edit, inserted) |
|
168 |
recover_spans(new_commands) |
|
169 |
||
170 |
case None => commands |
|
171 |
} |
|
172 |
} |
|
173 |
||
174 |
||
175 |
/* resulting document edits */ |
|
176 |
||
177 |
{ |
|
40479 | 178 |
val doc_edits = new mutable.ListBuffer[Document.Edit_Command] |
38417 | 179 |
var nodes = previous.nodes |
38374 | 180 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
181 |
edits foreach { |
44156 | 182 |
case (name, Document.Node.Remove()) => |
183 |
doc_edits += (name -> Document.Node.Remove()) |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
184 |
nodes -= name |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
185 |
|
44156 | 186 |
case (name, Document.Node.Edits(text_edits)) => |
43697
77ce24aa1770
explicit Document.Node.Header, with master_dir and thy_name;
wenzelm
parents:
43662
diff
changeset
|
187 |
val node = nodes(name) |
77ce24aa1770
explicit Document.Node.Header, with master_dir and thy_name;
wenzelm
parents:
43662
diff
changeset
|
188 |
val commands0 = node.commands |
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
189 |
val commands1 = edit_text(text_edits, commands0) |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
190 |
val commands2 = recover_spans(commands1) // FIXME somewhat slow |
38374 | 191 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
192 |
val removed_commands = commands0.iterator.filter(!commands2.contains(_)).toList |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
193 |
val inserted_commands = commands2.iterator.filter(!commands0.contains(_)).toList |
38374 | 194 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
195 |
val cmd_edits = |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
196 |
removed_commands.reverse.map(cmd => (commands0.prev(cmd), None)) ::: |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
197 |
inserted_commands.map(cmd => (commands2.prev(cmd), Some(cmd))) |
38374 | 198 |
|
44156 | 199 |
doc_edits += (name -> Document.Node.Edits(cmd_edits)) |
44157
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
200 |
nodes += (name -> node.copy(commands = commands2)) |
43722 | 201 |
|
44182 | 202 |
case (name, Document.Node.Header(header)) => |
44157
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
203 |
val node = nodes(name) |
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
204 |
val update_header = |
44182 | 205 |
(node.header, header) match { |
206 |
case (Exn.Res(thy_header0), Exn.Res(thy_header)) => thy_header0 != thy_header |
|
44160
8848867501fb
clarified document model header: master_dir (native wrt. editor, potentially URL) and node_name (full canonical path);
wenzelm
parents:
44157
diff
changeset
|
207 |
case _ => true |
44157
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
208 |
} |
44180 | 209 |
if (update_header) { |
44182 | 210 |
doc_edits += (name -> Document.Node.Header(header)) |
44180 | 211 |
nodes += (name -> node.copy(header = header)) |
212 |
} |
|
38374 | 213 |
} |
44157
a21d3e1e64fd
uniform treatment of header edits as document edits;
wenzelm
parents:
44156
diff
changeset
|
214 |
(doc_edits.toList, Document.Version(Document.new_id(), nodes)) |
38374 | 215 |
} |
216 |
} |
|
34268 | 217 |
} |