| author | wenzelm | 
| Thu, 08 Dec 2022 22:38:03 +0100 | |
| changeset 76610 | 6e2383488a55 | 
| parent 75393 | 87ebf5a50283 | 
| child 76824 | 919b0f21e8cc | 
| permissions | -rw-r--r-- | 
| 64611 | 1 | /* Title: Pure/PIDE/line.scala | 
| 64605 | 2 | Author: Makarius | 
| 3 | ||
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 4 | Line-oriented text documents, with some bias towards VSCode. | 
| 64605 | 5 | */ | 
| 6 | ||
| 64611 | 7 | package isabelle | 
| 64605 | 8 | |
| 9 | ||
| 10 | import scala.annotation.tailrec | |
| 11 | ||
| 12 | ||
| 75393 | 13 | object Line {
 | 
| 64806 | 14 | /* logical lines */ | 
| 15 | ||
| 16 | def normalize(text: String): String = | |
| 17 |     if (text.contains('\r')) text.replace("\r\n", "\n") else text
 | |
| 18 | ||
| 19 | def logical_lines(text: String): List[String] = | |
| 66923 | 20 | split_lines(normalize(text)) | 
| 64806 | 21 | |
| 22 | ||
| 64605 | 23 | /* position */ | 
| 24 | ||
| 75393 | 25 |   object Position {
 | 
| 64650 | 26 | val zero: Position = Position() | 
| 66605 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 27 | val offside: Position = Position(line = -1) | 
| 65234 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 28 | |
| 75393 | 29 |     object Ordering extends scala.math.Ordering[Position] {
 | 
| 65234 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 30 | def compare(p1: Position, p2: Position): Int = p1 compare p2 | 
| 
1d6e9048cb62
normalize changes strictly as specified in the protocol definition (assuming non-overlapping ranges, amending 0f555ce33970), e.g. relevant for automatic quotes/parentheses around selection;
 wenzelm parents: 
65203diff
changeset | 31 | } | 
| 64605 | 32 | } | 
| 33 | ||
| 75393 | 34 |   sealed case class Position(line: Int = 0, column: Int = 0) {
 | 
| 64605 | 35 | def line1: Int = line + 1 | 
| 36 | def column1: Int = column + 1 | |
| 37 | def print: String = line1.toString + ":" + column1.toString | |
| 38 | ||
| 39 | def compare(that: Position): Int = | |
| 40 |       line compare that.line match {
 | |
| 41 | case 0 => column compare that.column | |
| 42 | case i => i | |
| 43 | } | |
| 44 | ||
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 45 | def advance(text: String): Position = | 
| 64617 | 46 | if (text.isEmpty) this | 
| 47 |       else {
 | |
| 64806 | 48 | val lines = logical_lines(text) | 
| 64619 | 49 | val l = line + lines.length - 1 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 50 | val c = (if (l == line) column else 0) + Library.trim_line(lines.last).length | 
| 64619 | 51 | Position(l, c) | 
| 64605 | 52 | } | 
| 53 | } | |
| 54 | ||
| 55 | ||
| 56 | /* range (right-open interval) */ | |
| 57 | ||
| 75393 | 58 |   object Range {
 | 
| 64666 | 59 | def apply(start: Position): Range = Range(start, start) | 
| 60 | val zero: Range = Range(Position.zero) | |
| 64647 | 61 | } | 
| 62 | ||
| 75393 | 63 |   sealed case class Range(start: Position, stop: Position) {
 | 
| 64605 | 64 | if (start.compare(stop) > 0) | 
| 65 |       error("Bad line range: " + start.print + ".." + stop.print)
 | |
| 66 | ||
| 64647 | 67 | def print: String = | 
| 68 | if (start == stop) start.print | |
| 69 | else start.print + ".." + stop.print | |
| 64605 | 70 | } | 
| 71 | ||
| 72 | ||
| 64649 | 73 | /* positions within document node */ | 
| 74 | ||
| 75393 | 75 |   object Node_Position {
 | 
| 66605 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 76 | def offside(name: String): Node_Position = Node_Position(name, Position.offside) | 
| 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 77 | } | 
| 
261dcd52c5a0
less aggressive default position: prefer persistent defaults maintained by jEdit (amending 89c5bb2a2128);
 wenzelm parents: 
65903diff
changeset | 78 | |
| 75393 | 79 |   sealed case class Node_Position(name: String, pos: Position = Position.zero) {
 | 
| 64650 | 80 | def line: Int = pos.line | 
| 70792 | 81 | def line1: Int = pos.line1 | 
| 64650 | 82 | def column: Int = pos.column | 
| 70792 | 83 | def column1: Int = pos.column1 | 
| 64650 | 84 | } | 
| 85 | ||
| 75393 | 86 |   sealed case class Node_Range(name: String, range: Range = Range.zero) {
 | 
| 64650 | 87 | def start: Position = range.start | 
| 88 | def stop: Position = range.stop | |
| 89 | } | |
| 64649 | 90 | |
| 91 | ||
| 64605 | 92 | /* document with newline as separator (not terminator) */ | 
| 93 | ||
| 75393 | 94 |   object Document {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 95 | def apply(text: String): Document = | 
| 65903 | 96 | Document(logical_lines(text).map(s => Line(Library.isolate_substring(s)))) | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 97 | |
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 98 |     val empty: Document = apply("")
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 99 | |
| 65159 | 100 | private def split(line_text: String): List[Line] = | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 101 | if (line_text == "") List(Line.empty) else apply(line_text).lines | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 102 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 103 | private def chop(lines: List[Line]): (String, List[Line]) = | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 104 |       lines match {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 105 |         case Nil => ("", Nil)
 | 
| 65159 | 106 | case line :: rest => (line.text, rest) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 107 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 108 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 109 | private def length(lines: List[Line]): Int = | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 110 | if (lines.isEmpty) 0 | 
| 73359 | 111 |       else (lines.foldLeft(0) { case (n, line) => n + line.text.length + 1 }) - 1
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 112 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 113 |     def text(lines: List[Line]): String = lines.mkString("", "\n", "")
 | 
| 64605 | 114 | } | 
| 115 | ||
| 75393 | 116 |   sealed case class Document(lines: List[Line]) {
 | 
| 65197 | 117 | lazy val text_length: Text.Offset = Document.length(lines) | 
| 118 | def text_range: Text.Range = Text.Range(0, text_length) | |
| 119 | ||
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 120 | lazy val text: String = Document.text(lines) | 
| 64672 | 121 | |
| 67014 | 122 | def get_text(range: Text.Range): Option[String] = | 
| 65522 | 123 | if (text_range.contains(range)) Some(range.substring(text)) else None | 
| 64877 | 124 | |
| 64821 | 125 | override def toString: String = text | 
| 64605 | 126 | |
| 127 | override def equals(that: Any): Boolean = | |
| 128 |       that match {
 | |
| 129 | case other: Document => lines == other.lines | |
| 130 | case _ => false | |
| 131 | } | |
| 132 | override def hashCode(): Int = lines.hashCode | |
| 133 | ||
| 75393 | 134 |     def position(text_offset: Text.Offset): Position = {
 | 
| 135 |       @tailrec def move(i: Text.Offset, lines_count: Int, lines_rest: List[Line]): Position = {
 | |
| 64605 | 136 |         lines_rest match {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
70792diff
changeset | 137 | case Nil => require(i == 0, "bad Line.position.move"); Position(lines_count) | 
| 64605 | 138 | case line :: ls => | 
| 139 | val n = line.text.length | |
| 64617 | 140 | if (ls.isEmpty || i <= n) | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 141 | Position(lines_count).advance(line.text.substring(n - i)) | 
| 64605 | 142 | else move(i - (n + 1), lines_count + 1, ls) | 
| 143 | } | |
| 144 | } | |
| 64681 
642b6105e6f4
clarified signature: explicit Length to avoid implicit mistakes;
 wenzelm parents: 
64679diff
changeset | 145 | move(text_offset, 0, lines) | 
| 64605 | 146 | } | 
| 147 | ||
| 64683 
c0c09b6dfbe0
clarified signature: maintan Text.Length within Line.Document;
 wenzelm parents: 
64682diff
changeset | 148 | def range(text_range: Text.Range): Range = | 
| 
c0c09b6dfbe0
clarified signature: maintan Text.Length within Line.Document;
 wenzelm parents: 
64682diff
changeset | 149 | Range(position(text_range.start), position(text_range.stop)) | 
| 64679 
b2bf280b7e13
more uniform treatment of input/output wrt. client;
 wenzelm parents: 
64672diff
changeset | 150 | |
| 75393 | 151 |     def offset(pos: Position): Option[Text.Offset] = {
 | 
| 64605 | 152 | val l = pos.line | 
| 153 | val c = pos.column | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 154 | val n = lines.length | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 155 |       if (0 <= l && l < n) {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 156 |         if (0 <= c && c <= lines(l).text.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 157 | val line_offset = | 
| 73359 | 158 |             lines.iterator.take(l).foldLeft(0) { case (n, line) => n + line.text.length + 1 }
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 159 | Some(line_offset + c) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 160 | } | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 161 | else None | 
| 64605 | 162 | } | 
| 65197 | 163 | else if (l == n && c == 0) Some(text_length) | 
| 64605 | 164 | else None | 
| 165 | } | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 166 | |
| 75393 | 167 |     def change(remove: Range, insert: String): Option[(List[Text.Edit], Document)] = {
 | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 168 |       for {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 169 | edit_start <- offset(remove.start) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 170 | if remove.stop == remove.start || offset(remove.stop).isDefined | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 171 | l1 = remove.start.line | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 172 | l2 = remove.stop.line | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 173 | if l1 <= l2 | 
| 75393 | 174 |         (removed_text, new_lines) <- {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 175 | val c1 = remove.start.column | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 176 | val c2 = remove.stop.column | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 177 | |
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 178 | val (prefix, lines1) = lines.splitAt(l1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 179 | val (s1, rest1) = Document.chop(lines1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 180 | |
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 181 |           if (l1 == l2) {
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 182 |             if (0 <= c1 && c1 <= c2 && c2 <= s1.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 183 | Some( | 
| 65203 | 184 |                 if (lines1.isEmpty) ("", prefix ::: Document.split(insert))
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 185 |                 else {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 186 | val removed_text = s1.substring(c1, c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 187 | val changed_text = s1.substring(0, c1) + insert + s1.substring(c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 188 | (removed_text, prefix ::: Document.split(changed_text) ::: rest1) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 189 | }) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 190 | } | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 191 | else None | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 192 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 193 |           else {
 | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 194 | val (middle, lines2) = rest1.splitAt(l2 - l1 - 1) | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 195 | val (s2, rest2) = Document.chop(lines2) | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 196 |             if (0 <= c1 && c1 <= s1.length && 0 <= c2 && c2 <= s2.length) {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 197 | Some( | 
| 65203 | 198 |                 if (lines1.isEmpty) ("", prefix ::: Document.split(insert))
 | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 199 |                 else {
 | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 200 | val r1 = s1.substring(c1) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 201 | val r2 = s2.substring(0, c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 202 | val removed_text = | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 203 | if (lines2.isEmpty) Document.text(Line(r1) :: middle) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 204 | else Document.text(Line(r1) :: middle ::: List(Line(r2))) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 205 | val changed_text = s1.substring(0, c1) + insert + s2.substring(c2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 206 | (removed_text, prefix ::: Document.split(changed_text) ::: rest2) | 
| 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 207 | }) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 208 | } | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 209 | else None | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 210 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 211 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 212 | } | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 213 | yield | 
| 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 214 | (Text.Edit.removes(edit_start, removed_text) ::: Text.Edit.inserts(edit_start, insert), | 
| 65196 
e8760a98db78
discontinued pointless Text.Length: Javascript and Java agree in old-fashioned UTF-16;
 wenzelm parents: 
65159diff
changeset | 215 | Document(new_lines)) | 
| 65157 
cd977a5bd928
clarified Document.offset: including final position;
 wenzelm parents: 
64877diff
changeset | 216 | } | 
| 64605 | 217 | } | 
| 218 | ||
| 219 | ||
| 220 | /* line text */ | |
| 221 | ||
| 222 |   val empty: Line = new Line("")
 | |
| 223 | def apply(text: String): Line = if (text == "") empty else new Line(text) | |
| 224 | } | |
| 225 | ||
| 75393 | 226 | final class Line private(val text: String) {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
70792diff
changeset | 227 | require(text.forall(c => c != '\r' && c != '\n'), "bad line text") | 
| 64605 | 228 | |
| 229 | override def equals(that: Any): Boolean = | |
| 230 |     that match {
 | |
| 231 | case other: Line => text == other.text | |
| 232 | case _ => false | |
| 233 | } | |
| 234 | override def hashCode(): Int = text.hashCode | |
| 235 | override def toString: String = text | |
| 236 | } |