author  wenzelm 
Wed, 10 Nov 2010 15:42:20 +0100  
changeset 40457  3b0050718b31 
parent 40454  2516ea25a54b 
child 40478  4bae781b8f7c 
permissions  rwrr 
34268  1 
/* Title: Pure/Thy/thy_syntax.scala 
2 
Author: Makarius 

3 

38374  4 
Superficial theory syntax: tokens and spans. 
34268  5 
*/ 
6 

7 
package isabelle 

8 

9 

38239
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

10 
import scala.collection.mutable 
38374  11 
import scala.annotation.tailrec 
38239
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

12 

89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

13 

34303  14 
object Thy_Syntax 
34268  15 
{ 
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

16 
/** nested structure **/ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

17 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

18 
object Structure 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

19 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

20 
sealed abstract class Entry 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

21 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

22 
def length: Int 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

23 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

24 
case class Block(val name: String, val body: List[Entry]) extends Entry 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

25 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

26 
val length: Int = (0 /: body)(_ + _.length) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

27 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

28 
case class Atom(val command: Command) extends Entry 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

29 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

30 
def length: Int = command.length 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

31 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

32 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

33 
def parse_sections(syntax: Outer_Syntax, root_name: String, text: CharSequence): Entry = 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

34 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

35 
/* stack operations */ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

36 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

37 
def buffer(): mutable.ListBuffer[Entry] = new mutable.ListBuffer[Entry] 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

38 
var stack: List[(Int, String, mutable.ListBuffer[Entry])] = List((0, root_name, buffer())) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

39 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

40 
@tailrec def close(level: Int => Boolean) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

41 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

42 
stack match { 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

43 
case (lev, name, body) :: (_, _, body2) :: rest if level(lev) => 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

44 
body2 += Block(name, body.toList) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

45 
stack = stack.tail 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

46 
close(level) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

47 
case _ => 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

48 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

49 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

50 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

51 
def result(): Entry = 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

52 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

53 
close(_ => true) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

54 
val (_, name, body) = stack.head 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

55 
Block(name, body.toList) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

56 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

57 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

58 
def add(command: Command) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

59 
{ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

60 
syntax.heading_level(command) match { 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

61 
case Some(i) => 
40457  62 
close(_ >= i) 
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

63 
stack = (i, command.source, buffer()) :: stack 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

64 
case None => 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

65 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

66 
stack.head._3 += Atom(command) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

67 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

68 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

69 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

70 
/* result structure */ 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

71 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

72 
val spans = parse_spans(syntax.scan(text)) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

73 
spans.foreach(span => add(Command.span(span))) 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

74 
result() 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

75 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

76 
} 
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

77 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

78 

2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset

79 

38374  80 
/** parse spans **/ 
81 

38373  82 
def parse_spans(toks: List[Token]): List[List[Token]] = 
34268  83 
{ 
38373  84 
val result = new mutable.ListBuffer[List[Token]] 
38239
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

85 
val span = new mutable.ListBuffer[Token] 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

86 
val whitespace = new mutable.ListBuffer[Token] 
34268  87 

38239
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

88 
def flush(buffer: mutable.ListBuffer[Token]) 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

89 
{ 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

90 
if (!buffer.isEmpty) { result += buffer.toList; buffer.clear } 
34268  91 
} 
38239
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

92 
for (tok < toks) { 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

93 
if (tok.is_command) { flush(span); flush(whitespace); span += tok } 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

94 
else if (tok.is_ignored) whitespace += tok 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

95 
else { span ++= whitespace; whitespace.clear; span += tok } 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

96 
} 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

97 
flush(span); flush(whitespace) 
89a4d1028fb3
parse_spans: somewhat faster lowlevel implementation;
wenzelm
parents:
36956
diff
changeset

98 
result.toList 
34268  99 
} 
38374  100 

101 

102 

103 
/** text edits **/ 

104 

38417  105 
def text_edits(session: Session, previous: Document.Version, 
106 
edits: List[Document.Node_Text_Edit]): (List[Document.Edit[Command]], Document.Version) = 

38374  107 
{ 
108 
/* phase 1: edit individual command source */ 

109 

38425  110 
@tailrec def edit_text(eds: List[Text.Edit], commands: Linear_Set[Command]) 
38374  111 
: Linear_Set[Command] = 
112 
{ 

113 
eds match { 

114 
case e :: es => 

115 
Document.Node.command_starts(commands.iterator).find { 

116 
case (cmd, cmd_start) => 

117 
e.can_edit(cmd.source, cmd_start)  

118 
e.is_insert && e.start == cmd_start + cmd.length 

119 
} match { 

120 
case Some((cmd, cmd_start)) if e.can_edit(cmd.source, cmd_start) => 

121 
val (rest, text) = e.edit(cmd.source, cmd_start) 

122 
val new_commands = commands.insert_after(Some(cmd), Command.unparsed(text))  cmd 

123 
edit_text(rest.toList ::: es, new_commands) 

124 

125 
case Some((cmd, cmd_start)) => 

126 
edit_text(es, commands.insert_after(Some(cmd), Command.unparsed(e.text))) 

127 

128 
case None => 

129 
require(e.is_insert && e.start == 0) 

130 
edit_text(es, commands.insert_after(None, Command.unparsed(e.text))) 

131 
} 

132 
case Nil => commands 

133 
} 

134 
} 

135 

136 

137 
/* phase 2: recover command spans */ 

138 

139 
@tailrec def recover_spans(commands: Linear_Set[Command]): Linear_Set[Command] = 

140 
{ 

141 
commands.iterator.find(_.is_unparsed) match { 

142 
case Some(first_unparsed) => 

143 
val first = 

38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary  increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset

144 
commands.reverse_iterator(first_unparsed). 
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary  increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset

145 
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.head 
38374  146 
val last = 
38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary  increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset

147 
commands.iterator(first_unparsed). 
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary  increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset

148 
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.last 
38374  149 
val range = 
150 
commands.iterator(first).takeWhile(_ != last).toList ::: List(last) 

151 

152 
val sources = range.flatMap(_.span.map(_.source)) 

38569  153 
val spans0 = parse_spans(session.current_syntax().scan(sources.mkString)) 
38374  154 

155 
val (before_edit, spans1) = 

156 
if (!spans0.isEmpty && first.is_command && first.span == spans0.head) 

157 
(Some(first), spans0.tail) 

158 
else (commands.prev(first), spans0) 

159 

160 
val (after_edit, spans2) = 

161 
if (!spans1.isEmpty && last.is_command && last.span == spans1.last) 

162 
(Some(last), spans1.take(spans1.length  1)) 

163 
else (commands.next(last), spans1) 

164 

38419  165 
val inserted = spans2.map(span => new Command(session.new_id(), span)) 
38374  166 
val new_commands = 
167 
commands.delete_between(before_edit, after_edit).append_after(before_edit, inserted) 

168 
recover_spans(new_commands) 

169 

170 
case None => commands 

171 
} 

172 
} 

173 

174 

175 
/* resulting document edits */ 

176 

177 
{ 

178 
val doc_edits = new mutable.ListBuffer[Document.Edit[Command]] 

38417  179 
var nodes = previous.nodes 
38374  180 

181 
for ((name, text_edits) < edits) { 

182 
val commands0 = nodes(name).commands 

183 
val commands1 = edit_text(text_edits, commands0) 

184 
val commands2 = recover_spans(commands1) // FIXME somewhat slow 

185 

186 
val removed_commands = commands0.iterator.filter(!commands2.contains(_)).toList 

187 
val inserted_commands = commands2.iterator.filter(!commands0.contains(_)).toList 

188 

189 
val cmd_edits = 

190 
removed_commands.reverse.map(cmd => (commands0.prev(cmd), None)) ::: 

191 
inserted_commands.map(cmd => (commands2.prev(cmd), Some(cmd))) 

192 

193 
doc_edits += (name > Some(cmd_edits)) 

194 
nodes += (name > new Document.Node(commands2)) 

195 
} 

38419  196 
(doc_edits.toList, new Document.Version(session.new_id(), nodes)) 
38374  197 
} 
198 
} 

34268  199 
} 