src/Tools/jEdit/src/proofdocument/document.scala
author wenzelm
Sun Jan 10 16:40:21 2010 +0100 (2010-01-10)
changeset 34853 32b49207ca20
parent 34840 6c5560d48561
child 34855 81d0410dc3ac
permissions -rw-r--r--
misc tuning and clarification of Document/Change;
wenzelm@34407
     1
/*
wenzelm@34485
     2
 * Document as list of commands, consisting of lists of tokens
wenzelm@34407
     3
 *
wenzelm@34407
     4
 * @author Johannes Hölzl, TU Munich
immler@34532
     5
 * @author Fabian Immler, TU Munich
wenzelm@34485
     6
 * @author Makarius
wenzelm@34407
     7
 */
wenzelm@34407
     8
wenzelm@34318
     9
package isabelle.proofdocument
wenzelm@34318
    10
wenzelm@34760
    11
wenzelm@34818
    12
import scala.actors.Actor._
wenzelm@34824
    13
import scala.collection.mutable
wenzelm@34818
    14
wenzelm@34318
    15
import java.util.regex.Pattern
wenzelm@34703
    16
wenzelm@34318
    17
wenzelm@34823
    18
object Document
wenzelm@34483
    19
{
wenzelm@34582
    20
  // Be careful when changing this regex. Not only must it handle the
wenzelm@34818
    21
  // spurious end of a token but also:
wenzelm@34318
    22
  // Bug ID: 5050507 Pattern.matches throws StackOverflow Error
wenzelm@34318
    23
  // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507
wenzelm@34818
    24
wenzelm@34818
    25
  val token_pattern =
wenzelm@34318
    26
    Pattern.compile(
wenzelm@34318
    27
      "\\{\\*([^*]|\\*[^}]|\\*\\z)*(\\z|\\*\\})|" +
wenzelm@34318
    28
      "\\(\\*([^*]|\\*[^)]|\\*\\z)*(\\z|\\*\\))|" +
wenzelm@34818
    29
      "(\\?'?|')[A-Za-z_0-9.]*|" +
wenzelm@34818
    30
      "[A-Za-z_0-9.]+|" +
wenzelm@34318
    31
      "[!#$%&*+-/<=>?@^_|~]+|" +
wenzelm@34318
    32
      "\"([^\\\\\"]?(\\\\(.|\\z))?)*+(\"|\\z)|" +
wenzelm@34318
    33
      "`([^\\\\`]?(\\\\(.|\\z))?)*+(`|\\z)|" +
wenzelm@34318
    34
      "[()\\[\\]{}:;]", Pattern.MULTILINE)
wenzelm@34485
    35
wenzelm@34823
    36
  def empty(id: Isar_Document.Document_ID): Document =
wenzelm@34835
    37
  {
wenzelm@34835
    38
    val doc = new Document(id, Linear_Set(), Map(), Linear_Set(), Map())
wenzelm@34835
    39
    doc.assign_states(Nil)
wenzelm@34835
    40
    doc
wenzelm@34835
    41
  }
immler@34660
    42
wenzelm@34853
    43
  type Edit = (Option[Command], Option[Command])
wenzelm@34824
    44
wenzelm@34824
    45
  def text_edits(session: Session, old_doc: Document, new_id: Isar_Document.Document_ID,
wenzelm@34853
    46
    edits: List[Text_Edit]): (List[Edit], Document) =
wenzelm@34824
    47
  {
wenzelm@34840
    48
    require(old_doc.assignment.is_finished)
wenzelm@34840
    49
    val doc0 =
wenzelm@34840
    50
      Document_Body(old_doc.tokens, old_doc.token_start, old_doc.commands, old_doc.assignment.join)
wenzelm@34840
    51
wenzelm@34853
    52
    val changes = new mutable.ListBuffer[Edit]
wenzelm@34840
    53
    val doc = (doc0 /: edits)((doc1: Document_Body, edit: Text_Edit) =>
wenzelm@34824
    54
      {
wenzelm@34840
    55
        val (doc2, chgs) = doc1.text_edit(session, edit)
wenzelm@34824
    56
        changes ++ chgs
wenzelm@34824
    57
        doc2
wenzelm@34824
    58
      })
wenzelm@34840
    59
    val new_doc = new Document(new_id, doc.tokens, doc.token_start, doc.commands, doc.states)
wenzelm@34853
    60
    (changes.toList, new_doc)
wenzelm@34824
    61
  }
wenzelm@34778
    62
}
immler@34538
    63
wenzelm@34840
    64
private case class Document_Body(
wenzelm@34840
    65
  val tokens: Linear_Set[Token],   // FIXME plain List, inside Command
wenzelm@34840
    66
  val token_start: Map[Token, Int],  // FIXME eliminate
wenzelm@34840
    67
  val commands: Linear_Set[Command],
wenzelm@34840
    68
  val states: Map[Command, Command])
wenzelm@34483
    69
{
wenzelm@34840
    70
  /* token view */
wenzelm@34835
    71
wenzelm@34853
    72
  def text_edit(session: Session, e: Text_Edit): (Document_Body, List[Document.Edit]) =
immler@34660
    73
  {
immler@34660
    74
    case class TextChange(start: Int, added: String, removed: String)
immler@34660
    75
    val change = e match {
wenzelm@34838
    76
      case Text_Edit.Insert(s, a) => TextChange(s, a, "")
wenzelm@34838
    77
      case Text_Edit.Remove(s, r) => TextChange(s, "", r)
immler@34660
    78
    }
immler@34551
    79
    //indices of tokens
immler@34551
    80
    var start: Map[Token, Int] = token_start
immler@34551
    81
    def stop(t: Token) = start(t) + t.length
immler@34551
    82
    // split old token lists
wenzelm@34582
    83
    val tokens = Nil ++ this.tokens
immler@34551
    84
    val (begin, remaining) = tokens.span(stop(_) < change.start)
immler@34648
    85
    val (removed, end) = remaining.span(token_start(_) <= change.start + change.removed.length)
immler@34551
    86
    // update indices
wenzelm@34582
    87
    start = end.foldLeft(start)((s, t) =>
immler@34648
    88
      s + (t -> (s(t) + change.added.length - change.removed.length)))
wenzelm@34485
    89
immler@34551
    90
    val split_begin = removed.takeWhile(start(_) < change.start).
immler@34554
    91
      map (t => {
immler@34554
    92
          val split_tok = new Token(t.content.substring(0, change.start - start(t)), t.kind)
immler@34554
    93
          start += (split_tok -> start(t))
immler@34554
    94
          split_tok
immler@34554
    95
        })
immler@34554
    96
immler@34648
    97
    val split_end = removed.dropWhile(stop(_) < change.start + change.removed.length).
immler@34554
    98
      map (t => {
wenzelm@34582
    99
          val split_tok =
immler@34648
   100
            new Token(t.content.substring(change.start + change.removed.length - start(t)), t.kind)
immler@34554
   101
          start += (split_tok -> start(t))
immler@34554
   102
          split_tok
immler@34554
   103
        })
immler@34551
   104
    // update indices
immler@34554
   105
    start = removed.foldLeft (start) ((s, t) => s - t)
immler@34554
   106
    start = split_end.foldLeft (start) ((s, t) =>
immler@34554
   107
    s + (t -> (change.start + change.added.length)))
wenzelm@34318
   108
immler@34551
   109
    val ins = new Token(change.added, Token.Kind.OTHER)
immler@34551
   110
    start += (ins -> change.start)
wenzelm@34818
   111
wenzelm@34582
   112
    var invalid_tokens = split_begin ::: ins :: split_end ::: end
wenzelm@34582
   113
    var new_tokens: List[Token] = Nil
wenzelm@34582
   114
    var old_suffix: List[Token] = Nil
wenzelm@34318
   115
immler@34551
   116
    val match_start = invalid_tokens.firstOption.map(start(_)).getOrElse(0)
wenzelm@34582
   117
    val matcher =
wenzelm@34823
   118
      Document.token_pattern.matcher(Token.string_from_tokens(invalid_tokens, start))
immler@34526
   119
immler@34526
   120
    while (matcher.find() && invalid_tokens != Nil) {
wenzelm@34485
   121
			val kind =
wenzelm@34819
   122
        if (session.current_syntax.is_command(matcher.group))
wenzelm@34485
   123
          Token.Kind.COMMAND_START
wenzelm@34494
   124
        else if (matcher.end - matcher.start > 2 && matcher.group.substring(0, 2) == "(*")
wenzelm@34485
   125
          Token.Kind.COMMENT
wenzelm@34485
   126
        else
wenzelm@34485
   127
          Token.Kind.OTHER
immler@34551
   128
      val new_token = new Token(matcher.group, kind)
immler@34551
   129
      start += (new_token -> (match_start + matcher.start))
immler@34526
   130
      new_tokens ::= new_token
wenzelm@34318
   131
immler@34660
   132
      invalid_tokens = invalid_tokens dropWhile (stop(_) < stop(new_token))
immler@34526
   133
      invalid_tokens match {
wenzelm@34582
   134
        case t :: ts =>
wenzelm@34582
   135
          if (start(t) == start(new_token) &&
wenzelm@34582
   136
              start(t) > change.start + change.added.length) {
immler@34597
   137
          old_suffix = t :: ts
immler@34592
   138
          new_tokens = new_tokens.tail
immler@34526
   139
          invalid_tokens = Nil
immler@34526
   140
        }
immler@34526
   141
        case _ =>
wenzelm@34318
   142
      }
wenzelm@34318
   143
    }
immler@34526
   144
    val insert = new_tokens.reverse
immler@34544
   145
    val new_token_list = begin ::: insert ::: old_suffix
wenzelm@34840
   146
    token_changed(session, begin.lastOption, insert,
immler@34597
   147
      old_suffix.firstOption, new_token_list, start)
wenzelm@34318
   148
  }
wenzelm@34582
   149
wenzelm@34818
   150
wenzelm@34840
   151
  /* command view */
wenzelm@34485
   152
wenzelm@34582
   153
  private def token_changed(
wenzelm@34824
   154
      session: Session,
wenzelm@34824
   155
      before_change: Option[Token],
wenzelm@34824
   156
      inserted_tokens: List[Token],
wenzelm@34824
   157
      after_change: Option[Token],
wenzelm@34824
   158
      new_tokens: List[Token],
wenzelm@34853
   159
      new_token_start: Map[Token, Int]): (Document_Body, List[Document.Edit]) =
wenzelm@34485
   160
  {
wenzelm@34689
   161
    val new_tokenset = Linear_Set[Token]() ++ new_tokens
immler@34593
   162
    val cmd_before_change = before_change match {
immler@34593
   163
      case None => None
immler@34593
   164
      case Some(bc) =>
immler@34593
   165
        val cmd_with_bc = commands.find(_.contains(bc)).get
immler@34593
   166
        if (cmd_with_bc.tokens.last == bc) {
immler@34593
   167
          if (new_tokenset.next(bc).map(_.is_start).getOrElse(true))
immler@34593
   168
            Some(cmd_with_bc)
immler@34593
   169
          else commands.prev(cmd_with_bc)
immler@34593
   170
        }
immler@34593
   171
        else commands.prev(cmd_with_bc)
immler@34593
   172
    }
immler@34544
   173
immler@34593
   174
    val cmd_after_change = after_change match {
immler@34593
   175
      case None => None
immler@34593
   176
      case Some(ac) =>
immler@34593
   177
        val cmd_with_ac = commands.find(_.contains(ac)).get
immler@34593
   178
        if (ac.is_start)
immler@34593
   179
          Some(cmd_with_ac)
immler@34593
   180
        else
immler@34593
   181
          commands.next(cmd_with_ac)
immler@34593
   182
    }
wenzelm@34485
   183
immler@34593
   184
    val removed_commands = commands.dropWhile(Some(_) != cmd_before_change).drop(1).
immler@34554
   185
      takeWhile(Some(_) != cmd_after_change)
immler@34554
   186
immler@34554
   187
    // calculate inserted commands
immler@34526
   188
    def tokens_to_commands(tokens: List[Token]): List[Command]= {
immler@34526
   189
      tokens match {
immler@34526
   190
        case Nil => Nil
wenzelm@34582
   191
        case t :: ts =>
wenzelm@34582
   192
          val (cmd, rest) =
wenzelm@34582
   193
            ts.span(t => t.kind != Token.Kind.COMMAND_START && t.kind != Token.Kind.COMMENT)
wenzelm@34778
   194
          new Command(session.create_id(), t :: cmd, new_token_start) :: tokens_to_commands(rest)
wenzelm@34485
   195
      }
wenzelm@34485
   196
    }
wenzelm@34485
   197
immler@34593
   198
    val split_begin =
immler@34593
   199
      if (before_change.isDefined) {
immler@34593
   200
        val changed =
immler@34593
   201
          if (cmd_before_change.isDefined)
immler@34595
   202
            new_tokens.dropWhile(_ != cmd_before_change.get.tokens.last).drop(1)
immler@34593
   203
          else new_tokenset
immler@34593
   204
        if (changed.exists(_ == before_change.get))
immler@34597
   205
          changed.takeWhile(_ != before_change.get).toList :::
immler@34597
   206
            List(before_change.get)
immler@34593
   207
        else Nil
immler@34593
   208
      } else Nil
immler@34554
   209
immler@34593
   210
    val split_end =
immler@34667
   211
      if (after_change.isDefined) {
immler@34595
   212
        val unchanged = new_tokens.dropWhile(_ != after_change.get)
immler@34667
   213
        if(cmd_after_change.isDefined) {
immler@34667
   214
          if (unchanged.exists(_ == cmd_after_change.get.tokens.first))
immler@34667
   215
            unchanged.takeWhile(_ != cmd_after_change.get.tokens.first).toList
immler@34667
   216
          else Nil
immler@34667
   217
        } else {
immler@34667
   218
          unchanged
immler@34667
   219
        }
immler@34593
   220
      } else Nil
immler@34593
   221
immler@34597
   222
    val rescan_begin =
immler@34597
   223
      split_begin :::
immler@34597
   224
        before_change.map(bc => new_tokens.dropWhile(_ != bc).drop(1)).getOrElse(new_tokens)
wenzelm@34582
   225
    val rescanning_tokens =
immler@34597
   226
      after_change.map(ac => rescan_begin.takeWhile(_ != ac)).getOrElse(rescan_begin) :::
immler@34597
   227
        split_end
immler@34593
   228
    val inserted_commands = tokens_to_commands(rescanning_tokens.toList)
immler@34554
   229
immler@34550
   230
    // build new document
wenzelm@34739
   231
    val new_commandset = commands.
wenzelm@34739
   232
      delete_between(cmd_before_change, cmd_after_change).
wenzelm@34739
   233
      append_after(cmd_before_change, inserted_commands)
wenzelm@34739
   234
immler@34554
   235
immler@34544
   236
    val doc =
wenzelm@34840
   237
      new Document_Body(new_tokenset, new_token_start, new_commandset, states -- removed_commands)
immler@34660
   238
immler@34660
   239
    val removes =
immler@34660
   240
      for (cmd <- removed_commands) yield (cmd_before_change -> None)
immler@34660
   241
    val inserts =
immler@34660
   242
      for (cmd <- inserted_commands) yield (doc.commands.prev(cmd) -> Some(cmd))
immler@34660
   243
wenzelm@34840
   244
    (doc, removes.toList ++ inserts)
wenzelm@34485
   245
  }
wenzelm@34840
   246
}
wenzelm@34840
   247
wenzelm@34840
   248
class Document(
wenzelm@34840
   249
    val id: Isar_Document.Document_ID,
wenzelm@34840
   250
    val tokens: Linear_Set[Token],   // FIXME plain List, inside Command
wenzelm@34840
   251
    val token_start: Map[Token, Int],  // FIXME eliminate
wenzelm@34840
   252
    val commands: Linear_Set[Command],
wenzelm@34840
   253
    old_states: Map[Command, Command])
wenzelm@34840
   254
{
wenzelm@34840
   255
  def content = Token.string_from_tokens(Nil ++ tokens, token_start)
wenzelm@34840
   256
wenzelm@34840
   257
wenzelm@34840
   258
  /* command/state assignment */
wenzelm@34840
   259
wenzelm@34840
   260
  val assignment = Future.promise[Map[Command, Command]]
wenzelm@34853
   261
  def await_assignment { assignment.join }
wenzelm@34840
   262
wenzelm@34840
   263
  @volatile private var tmp_states = old_states
wenzelm@34840
   264
wenzelm@34840
   265
  def assign_states(new_states: List[(Command, Command)])
wenzelm@34840
   266
  {
wenzelm@34840
   267
    assignment.fulfill(tmp_states ++ new_states)
wenzelm@34840
   268
    tmp_states = Map()
wenzelm@34840
   269
  }
wenzelm@34840
   270
wenzelm@34840
   271
  def current_state(cmd: Command): State =
wenzelm@34840
   272
  {
wenzelm@34840
   273
    require(assignment.is_finished)
wenzelm@34840
   274
    (assignment.join)(cmd).current_state
wenzelm@34840
   275
  }
wenzelm@34840
   276
immler@34596
   277
immler@34596
   278
  val commands_offsets = {
immler@34596
   279
    var last_stop = 0
immler@34596
   280
    (for (c <- commands) yield {
wenzelm@34824
   281
      val r = c -> (last_stop, c.stop(this))
immler@34596
   282
      last_stop = c.stop(this)
immler@34596
   283
      r
immler@34596
   284
    }).toArray
immler@34596
   285
  }
immler@34596
   286
wenzelm@34712
   287
  def command_at(pos: Int): Option[Command] =
wenzelm@34712
   288
    find_command(pos, 0, commands_offsets.length)
wenzelm@34712
   289
immler@34596
   290
  // use a binary search to find commands for a given offset
wenzelm@34712
   291
  private def find_command(pos: Int, array_start: Int, array_stop: Int): Option[Command] =
wenzelm@34712
   292
  {
immler@34596
   293
    val middle_index = (array_start + array_stop) / 2
wenzelm@34712
   294
    if (middle_index >= commands_offsets.length) return None
immler@34596
   295
    val (middle, (start, stop)) = commands_offsets(middle_index)
immler@34596
   296
    // does middle contain pos?
wenzelm@34712
   297
    if (start <= pos && pos < stop)
wenzelm@34712
   298
      Some(middle)
immler@34596
   299
    else if (start > pos)
wenzelm@34712
   300
      find_command(pos, array_start, middle_index)
immler@34596
   301
    else if (stop <= pos)
wenzelm@34712
   302
      find_command(pos, middle_index + 1, array_stop)
wenzelm@34712
   303
    else error("impossible")
immler@34596
   304
  }
wenzelm@34318
   305
}