| author | wenzelm | 
| Wed, 21 Feb 2024 19:54:17 +0100 | |
| changeset 79683 | ade429ddb1fc | 
| parent 76825 | 65e8a9272837 | 
| child 81061 | fe9ae6b67c29 | 
| permissions | -rw-r--r-- | 
| 64611 | 1 | /* Title: Pure/PIDE/line.scala | 
| 64605 | 2 | Author: Makarius | 
| 3 | ||
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 4 | Line-oriented text documents, with some bias towards VSCode. | 
| 64605 | 5 | */ | 
| 6 | ||
| 64611 | 7 | package isabelle | 
| 64605 | 8 | |
| 9 | ||
| 10 | import scala.annotation.tailrec | |
| 11 | ||
| 12 | ||
| 75393 | 13 | object Line {
 | 
| 64806 | 14 | /* logical lines */ | 
| 15 | ||
| 16 | def normalize(text: String): String = | |
| 17 |     if (text.contains('\r')) text.replace("\r\n", "\n") else text
 | |
| 18 | ||
| 19 | def logical_lines(text: String): List[String] = | |
| 66923 | 20 | split_lines(normalize(text)) | 
| 64806 | 21 | |
| 22 | ||
| 64605 | 23 | /* position */ | 
| 24 | ||
| 75393 | 25 |   object Position {
 | 
| 64650 | 26 | val zero: Position = Position() | 
| 66605 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 27 | val offside: Position = Position(line = -1) | 
| 65234 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 28 | |
| 75393 | 29 |     object Ordering extends scala.math.Ordering[Position] {
 | 
| 65234 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 30 | def compare(p1: Position, p2: Position): Int = p1 compare p2 | 
| 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 31 | } | 
| 64605 | 32 | } | 
| 33 | ||
| 75393 | 34 |   sealed case class Position(line: Int = 0, column: Int = 0) {
 | 
| 64605 | 35 | def line1: Int = line + 1 | 
| 36 | def column1: Int = column + 1 | |
| 76825 | 37 | def print: String = | 
| 38 | if (column == 0) line1.toString | |
| 39 | else line1.toString + ":" + column1.toString | |
| 64605 | 40 | |
| 41 | def compare(that: Position): Int = | |
| 42 |       line compare that.line match {
 | |
| 43 | case 0 => column compare that.column | |
| 44 | case i => i | |
| 45 | } | |
| 46 | ||
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 47 | def advance(text: String): Position = | 
| 64617 | 48 | if (text.isEmpty) this | 
| 49 |       else {
 | |
| 64806 | 50 | val lines = logical_lines(text) | 
| 64619 | 51 | val l = line + lines.length - 1 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 52 | val c = (if (l == line) column else 0) + Library.trim_line(lines.last).length | 
| 76824 | 53 | Position(line = l, column = c) | 
| 64605 | 54 | } | 
| 55 | } | |
| 56 | ||
| 57 | ||
| 58 | /* range (right-open interval) */ | |
| 59 | ||
| 75393 | 60 |   object Range {
 | 
| 64666 | 61 | def apply(start: Position): Range = Range(start, start) | 
| 62 | val zero: Range = Range(Position.zero) | |
| 64647 | 63 | } | 
| 64 | ||
| 75393 | 65 |   sealed case class Range(start: Position, stop: Position) {
 | 
| 76824 | 66 |     if (start.compare(stop) > 0) {
 | 
| 64605 | 67 |       error("Bad line range: " + start.print + ".." + stop.print)
 | 
| 76824 | 68 | } | 
| 64605 | 69 | |
| 64647 | 70 | def print: String = | 
| 71 | if (start == stop) start.print | |
| 72 | else start.print + ".." + stop.print | |
| 64605 | 73 | } | 
| 74 | ||
| 75 | ||
| 64649 | 76 | /* positions within document node */ | 
| 77 | ||
| 75393 | 78 |   object Node_Position {
 | 
| 76824 | 79 | def offside(name: String): Node_Position = Node_Position(name, pos = Position.offside) | 
| 66605 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 80 | } | 
| 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 81 | |
| 75393 | 82 |   sealed case class Node_Position(name: String, pos: Position = Position.zero) {
 | 
| 64650 | 83 | def line: Int = pos.line | 
| 70792 | 84 | def line1: Int = pos.line1 | 
| 64650 | 85 | def column: Int = pos.column | 
| 70792 | 86 | def column1: Int = pos.column1 | 
| 64650 | 87 | } | 
| 88 | ||
| 75393 | 89 |   sealed case class Node_Range(name: String, range: Range = Range.zero) {
 | 
| 64650 | 90 | def start: Position = range.start | 
| 91 | def stop: Position = range.stop | |
| 92 | } | |
| 64649 | 93 | |
| 94 | ||
| 64605 | 95 | /* document with newline as separator (not terminator) */ | 
| 96 | ||
| 75393 | 97 |   object Document {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 98 | def apply(text: String): Document = | 
| 65903 | 99 | Document(logical_lines(text).map(s => Line(Library.isolate_substring(s)))) | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 100 | |
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 101 |     val empty: Document = apply("")
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 102 | |
| 65159 | 103 | private def split(line_text: String): List[Line] = | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 104 | if (line_text == "") List(Line.empty) else apply(line_text).lines | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 105 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 106 | private def chop(lines: List[Line]): (String, List[Line]) = | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 107 |       lines match {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 108 |         case Nil => ("", Nil)
 | 
| 65159 | 109 | case line :: rest => (line.text, rest) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 110 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 111 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 112 | private def length(lines: List[Line]): Int = | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 113 | if (lines.isEmpty) 0 | 
| 73359 | 114 |       else (lines.foldLeft(0) { case (n, line) => n + line.text.length + 1 }) - 1
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 115 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 116 |     def text(lines: List[Line]): String = lines.mkString("", "\n", "")
 | 
| 64605 | 117 | } | 
| 118 | ||
| 75393 | 119 |   sealed case class Document(lines: List[Line]) {
 | 
| 65197 | 120 | lazy val text_length: Text.Offset = Document.length(lines) | 
| 121 | def text_range: Text.Range = Text.Range(0, text_length) | |
| 122 | ||
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 123 | lazy val text: String = Document.text(lines) | 
| 64672 | 124 | |
| 67014 | 125 | def get_text(range: Text.Range): Option[String] = | 
| 65522 | 126 | if (text_range.contains(range)) Some(range.substring(text)) else None | 
| 64877 | 127 | |
| 64821 | 128 | override def toString: String = text | 
| 64605 | 129 | |
| 130 | override def equals(that: Any): Boolean = | |
| 131 |       that match {
 | |
| 132 | case other: Document => lines == other.lines | |
| 133 | case _ => false | |
| 134 | } | |
| 135 | override def hashCode(): Int = lines.hashCode | |
| 136 | ||
| 75393 | 137 |     def position(text_offset: Text.Offset): Position = {
 | 
| 138 |       @tailrec def move(i: Text.Offset, lines_count: Int, lines_rest: List[Line]): Position = {
 | |
| 64605 | 139 |         lines_rest match {
 | 
| 76824 | 140 | case Nil => | 
| 141 | require(i == 0, "bad Line.position.move") | |
| 142 | Position(line = lines_count) | |
| 64605 | 143 | case line :: ls => | 
| 144 | val n = line.text.length | |
| 76824 | 145 |             if (ls.isEmpty || i <= n) {
 | 
| 146 | Position(line = lines_count).advance(line.text.substring(n - i)) | |
| 147 | } | |
| 64605 | 148 | else move(i - (n + 1), lines_count + 1, ls) | 
| 149 | } | |
| 150 | } | |
| 64681 
642b6105e6f4
clarified signature: explicit Length to avoid implicit mistakes;
 wenzelm parents: 
64679diff
changeset | 151 | move(text_offset, 0, lines) | 
| 64605 | 152 | } | 
| 153 | ||
| 64683 
c0c09b6dfbe0
clarified signature: maintan Text.Length within Line.Document;
 wenzelm parents: 
64682diff
changeset | 154 | def range(text_range: Text.Range): Range = | 
| 
c0c09b6dfbe0
clarified signature: maintan Text.Length within Line.Document;
 wenzelm parents: 
64682diff
changeset | 155 | Range(position(text_range.start), position(text_range.stop)) | 
| 64679 
b2bf280b7e13
more uniform treatment of input/output wrt. client;
 wenzelm parents: 
64672diff
changeset | 156 | |
| 75393 | 157 |     def offset(pos: Position): Option[Text.Offset] = {
 | 
| 64605 | 158 | val l = pos.line | 
| 159 | val c = pos.column | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 160 | val n = lines.length | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 161 |       if (0 <= l && l < n) {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 162 |         if (0 <= c && c <= lines(l).text.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 163 | val line_offset = | 
| 73359 | 164 |             lines.iterator.take(l).foldLeft(0) { case (n, line) => n + line.text.length + 1 }
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 165 | Some(line_offset + c) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 166 | } | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 167 | else None | 
| 64605 | 168 | } | 
| 65197 | 169 | else if (l == n && c == 0) Some(text_length) | 
| 64605 | 170 | else None | 
| 171 | } | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 172 | |
| 75393 | 173 |     def change(remove: Range, insert: String): Option[(List[Text.Edit], Document)] = {
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 174 |       for {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 175 | edit_start <- offset(remove.start) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 176 | if remove.stop == remove.start || offset(remove.stop).isDefined | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 177 | l1 = remove.start.line | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 178 | l2 = remove.stop.line | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 179 | if l1 <= l2 | 
| 75393 | 180 |         (removed_text, new_lines) <- {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 181 | val c1 = remove.start.column | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 182 | val c2 = remove.stop.column | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 183 | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 184 | val (prefix, lines1) = lines.splitAt(l1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 185 | val (s1, rest1) = Document.chop(lines1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 186 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 187 |           if (l1 == l2) {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 188 |             if (0 <= c1 && c1 <= c2 && c2 <= s1.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 189 | Some( | 
| 65203 | 190 |                 if (lines1.isEmpty) ("", prefix ::: Document.split(insert))
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 191 |                 else {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 192 | val removed_text = s1.substring(c1, c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 193 | val changed_text = s1.substring(0, c1) + insert + s1.substring(c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 194 | (removed_text, prefix ::: Document.split(changed_text) ::: rest1) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 195 | }) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 196 | } | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 197 | else None | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 198 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 199 |           else {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 200 | val (middle, lines2) = rest1.splitAt(l2 - l1 - 1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 201 | val (s2, rest2) = Document.chop(lines2) | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 202 |             if (0 <= c1 && c1 <= s1.length && 0 <= c2 && c2 <= s2.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 203 | Some( | 
| 65203 | 204 |                 if (lines1.isEmpty) ("", prefix ::: Document.split(insert))
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 205 |                 else {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 206 | val r1 = s1.substring(c1) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 207 | val r2 = s2.substring(0, c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 208 | val removed_text = | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 209 | if (lines2.isEmpty) Document.text(Line(r1) :: middle) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 210 | else Document.text(Line(r1) :: middle ::: List(Line(r2))) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 211 | val changed_text = s1.substring(0, c1) + insert + s2.substring(c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 212 | (removed_text, prefix ::: Document.split(changed_text) ::: rest2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 213 | }) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 214 | } | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 215 | else None | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 216 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 217 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 218 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 219 | yield | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 220 | (Text.Edit.removes(edit_start, removed_text) ::: Text.Edit.inserts(edit_start, insert), | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 221 | Document(new_lines)) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 222 | } | 
| 64605 | 223 | } | 
| 224 | ||
| 225 | ||
| 226 | /* line text */ | |
| 227 | ||
| 228 |   val empty: Line = new Line("")
 | |
| 229 | def apply(text: String): Line = if (text == "") empty else new Line(text) | |
| 230 | } | |
| 231 | ||
| 75393 | 232 | final class Line private(val text: String) {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
70792diff
changeset | 233 | require(text.forall(c => c != '\r' && c != '\n'), "bad line text") | 
| 64605 | 234 | |
| 235 | override def equals(that: Any): Boolean = | |
| 236 |     that match {
 | |
| 237 | case other: Line => text == other.text | |
| 238 | case _ => false | |
| 239 | } | |
| 240 | override def hashCode(): Int = text.hashCode | |
| 241 | override def toString: String = text | |
| 242 | } |