| author | wenzelm | 
| Wed, 08 Mar 2017 20:30:05 +0100 | |
| changeset 65155 | 25bccf5bf33e | 
| parent 65154 | ba1929b749f0 | 
| child 65156 | 35fefc249311 | 
| permissions | -rw-r--r-- | 
| 38425 | 1  | 
/* Title: Pure/PIDE/text.scala  | 
| 34276 | 2  | 
Author: Fabian Immler, TU Munich  | 
3  | 
Author: Makarius  | 
|
4  | 
||
| 38425 | 5  | 
Basic operations on plain text.  | 
| 34276 | 6  | 
*/  | 
7  | 
||
8  | 
package isabelle  | 
|
9  | 
||
10  | 
||
| 44379 | 11  | 
import scala.collection.mutable  | 
12  | 
import scala.util.Sorting  | 
|
13  | 
||
14  | 
||
| 38425 | 15  | 
object Text  | 
| 34276 | 16  | 
{
 | 
| 38477 | 17  | 
/* offset */  | 
| 38426 | 18  | 
|
19  | 
type Offset = Int  | 
|
20  | 
||
| 38477 | 21  | 
|
| 
38565
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
22  | 
/* range -- with total quasi-ordering */  | 
| 38477 | 23  | 
|
| 38568 | 24  | 
object Range  | 
25  | 
  {
 | 
|
26  | 
def apply(start: Offset): Range = Range(start, start)  | 
|
| 44379 | 27  | 
|
| 64678 | 28  | 
val full: Range = apply(0, Integer.MAX_VALUE / 2)  | 
| 56172 | 29  | 
val offside: Range = apply(-1)  | 
30  | 
||
| 65154 | 31  | 
object Ordering extends scala.math.Ordering[Range]  | 
| 44379 | 32  | 
    {
 | 
| 65154 | 33  | 
def compare(r1: Range, r2: Range): Int = r1 compare r2  | 
| 44379 | 34  | 
}  | 
| 38568 | 35  | 
}  | 
36  | 
||
| 60215 | 37  | 
sealed case class Range(start: Offset, stop: Offset)  | 
| 38427 | 38  | 
  {
 | 
| 
38565
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
39  | 
    // denotation: {start} Un {i. start < i & i < stop}
 | 
| 43425 | 40  | 
if (start > stop)  | 
| 
64546
 
134ae7da2ccf
clarified output: avoid confusion with line:column notation;
 
wenzelm 
parents: 
64370 
diff
changeset
 | 
41  | 
      error("Bad range: [" + start.toString + ".." + stop.toString + "]")
 | 
| 38477 | 42  | 
|
| 
64546
 
134ae7da2ccf
clarified output: avoid confusion with line:column notation;
 
wenzelm 
parents: 
64370 
diff
changeset
 | 
43  | 
override def toString: String = "[" + start.toString + ".." + stop.toString + "]"  | 
| 38563 | 44  | 
|
| 
65155
 
25bccf5bf33e
clarified native Text.Offset versus Text.Length index Int;
 
wenzelm 
parents: 
65154 
diff
changeset
 | 
45  | 
def length: Offset = stop - start  | 
| 
47542
 
26d0a76fef0a
more robust Sendback handling: JVM/jEdit paranoia for case matching, treat Pretty body not just XML.Text, replace proper_range only (without trailing whitespace);
 
wenzelm 
parents: 
46712 
diff
changeset
 | 
46  | 
|
| 38427 | 47  | 
def map(f: Offset => Offset): Range = Range(f(start), f(stop))  | 
| 56308 | 48  | 
def +(i: Offset): Range = if (i == 0) this else map(_ + i)  | 
49  | 
def -(i: Offset): Range = if (i == 0) this else map(_ - i)  | 
|
| 38662 | 50  | 
|
| 38725 | 51  | 
def is_singularity: Boolean = start == stop  | 
| 
56590
 
d01d183e84ea
clarified treatment of markup ranges wrt. revert/convert: inflate_singularity allows to retrieve information like language_context more reliably during editing;
 
wenzelm 
parents: 
56473 
diff
changeset
 | 
52  | 
def inflate_singularity: Range = if (is_singularity) Range(start, start + 1) else this  | 
| 38662 | 53  | 
|
| 
58749
 
83b0f633190e
some structure matching, based on line token iterators;
 
wenzelm 
parents: 
57912 
diff
changeset
 | 
54  | 
def touches(i: Offset): Boolean = start <= i && i <= stop  | 
| 
 
83b0f633190e
some structure matching, based on line token iterators;
 
wenzelm 
parents: 
57912 
diff
changeset
 | 
55  | 
|
| 
38565
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
56  | 
def contains(i: Offset): Boolean = start == i || start < i && i < stop  | 
| 
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
57  | 
def contains(that: Range): Boolean = this.contains(that.start) && that.stop <= this.stop  | 
| 
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
58  | 
def overlaps(that: Range): Boolean = this.contains(that.start) || that.contains(this.start)  | 
| 
 
32b924a832c4
further clarification/unification of Position.Range and Text.Range concerning singularities: start offset is always included;
 
wenzelm 
parents: 
38564 
diff
changeset
 | 
59  | 
def compare(that: Range): Int = if (overlaps(that)) 0 else this.start compare that.start  | 
| 38485 | 60  | 
|
| 
45240
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
61  | 
def apart(that: Range): Boolean =  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
62  | 
(this.start max that.start) > (this.stop min that.stop)  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
63  | 
|
| 38564 | 64  | 
def restrict(that: Range): Range =  | 
| 38485 | 65  | 
Range(this.start max that.start, this.stop min that.stop)  | 
| 
43428
 
b41dea5772c6
more robust treatment of partial range restriction;
 
wenzelm 
parents: 
43425 
diff
changeset
 | 
66  | 
|
| 
 
b41dea5772c6
more robust treatment of partial range restriction;
 
wenzelm 
parents: 
43425 
diff
changeset
 | 
67  | 
def try_restrict(that: Range): Option[Range] =  | 
| 
45240
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
68  | 
if (this apart that) None  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
69  | 
else Some(restrict(that))  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
70  | 
|
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
71  | 
def try_join(that: Range): Option[Range] =  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
72  | 
if (this apart that) None  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
73  | 
else Some(Range(this.start min that.start, this.stop max that.stop))  | 
| 38427 | 74  | 
}  | 
| 38426 | 75  | 
|
76  | 
||
| 44379 | 77  | 
/* perspective */  | 
78  | 
||
| 44473 | 79  | 
object Perspective  | 
| 44379 | 80  | 
  {
 | 
| 44474 | 81  | 
val empty: Perspective = Perspective(Nil)  | 
| 44379 | 82  | 
|
| 64678 | 83  | 
def full: Perspective = Perspective(List(Range.full))  | 
| 
46576
 
ae9286f64574
approximate Perspective.full within the bounds of the JVM;
 
wenzelm 
parents: 
46207 
diff
changeset
 | 
84  | 
|
| 44473 | 85  | 
def apply(ranges: Seq[Range]): Perspective =  | 
| 44379 | 86  | 
    {
 | 
| 65154 | 87  | 
val result = new mutable.ListBuffer[Range]  | 
88  | 
var last: Option[Range] = None  | 
|
| 
45240
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
89  | 
      def ship(next: Option[Range]) { result ++= last; last = next }
 | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
90  | 
|
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
91  | 
for (range <- ranges.sortBy(_.start))  | 
| 44473 | 92  | 
      {
 | 
93  | 
        last match {
 | 
|
| 
45240
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
94  | 
case None => ship(Some(range))  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
95  | 
case Some(last_range) =>  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
96  | 
            last_range.try_join(range) match {
 | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
97  | 
case None => ship(Some(range))  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
98  | 
case joined => last = joined  | 
| 
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
99  | 
}  | 
| 44473 | 100  | 
}  | 
| 44379 | 101  | 
}  | 
| 
45240
 
9d97bd3c086a
proper normal form for Perspective.ranges (overlapping ranges could be joined in wrong order, crashing multiple editor views);
 
wenzelm 
parents: 
44474 
diff
changeset
 | 
102  | 
ship(None)  | 
| 44473 | 103  | 
new Perspective(result.toList)  | 
| 44379 | 104  | 
}  | 
| 44473 | 105  | 
}  | 
106  | 
||
| 46712 | 107  | 
final class Perspective private(  | 
108  | 
val ranges: List[Range]) // visible text partitioning in canonical order  | 
|
| 44473 | 109  | 
  {
 | 
110  | 
def is_empty: Boolean = ranges.isEmpty  | 
|
111  | 
def range: Range =  | 
|
112  | 
if (is_empty) Range(0)  | 
|
113  | 
else Range(ranges.head.start, ranges.last.stop)  | 
|
| 
45631
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
114  | 
|
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
115  | 
override def hashCode: Int = ranges.hashCode  | 
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
116  | 
override def equals(that: Any): Boolean =  | 
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
117  | 
      that match {
 | 
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
118  | 
case other: Perspective => ranges == other.ranges  | 
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
119  | 
case _ => false  | 
| 
 
6bdf8b926f50
recovered structural equality from 9d97bd3c086a, otherwise update_perspective might be issued over and over again, canceling a pending/slow execution;
 
wenzelm 
parents: 
45470 
diff
changeset
 | 
120  | 
}  | 
| 57912 | 121  | 
override def toString: String = ranges.toString  | 
| 44379 | 122  | 
}  | 
123  | 
||
124  | 
||
| 38577 | 125  | 
/* information associated with text range */  | 
126  | 
||
| 65154 | 127  | 
sealed case class Info[A](range: Range, info: A)  | 
| 38577 | 128  | 
  {
 | 
| 65154 | 129  | 
def restrict(r: Range): Info[A] = Info(range.restrict(r), info)  | 
130  | 
def try_restrict(r: Range): Option[Info[A]] = range.try_restrict(r).map(Info(_, info))  | 
|
| 65132 | 131  | 
|
132  | 
def map[B](f: A => B): Info[B] = Info(range, f(info))  | 
|
| 38577 | 133  | 
}  | 
134  | 
||
| 45470 | 135  | 
type Markup = Info[XML.Elem]  | 
| 45455 | 136  | 
|
| 38577 | 137  | 
|
| 38426 | 138  | 
/* editing */  | 
| 34286 | 139  | 
|
| 38425 | 140  | 
object Edit  | 
141  | 
  {
 | 
|
| 38426 | 142  | 
def insert(start: Offset, text: String): Edit = new Edit(true, start, text)  | 
143  | 
def remove(start: Offset, text: String): Edit = new Edit(false, start, text)  | 
|
| 64816 | 144  | 
def replace(start: Offset, old_text: String, new_text: String): List[Edit] =  | 
145  | 
if (old_text == new_text) Nil  | 
|
146  | 
else if (old_text == "") List(insert(start, new_text))  | 
|
147  | 
else List(remove(start, old_text), insert(start, new_text))  | 
|
| 38425 | 148  | 
}  | 
| 34286 | 149  | 
|
| 46712 | 150  | 
final class Edit private(val is_insert: Boolean, val start: Offset, val text: String)  | 
| 38425 | 151  | 
  {
 | 
| 57912 | 152  | 
override def toString: String =  | 
| 38425 | 153  | 
      (if (is_insert) "Insert(" else "Remove(") + (start, text).toString + ")"
 | 
| 34286 | 154  | 
|
155  | 
||
| 38425 | 156  | 
/* transform offsets */  | 
| 34286 | 157  | 
|
| 38426 | 158  | 
private def transform(do_insert: Boolean, i: Offset): Offset =  | 
159  | 
if (i < start) i  | 
|
| 43425 | 160  | 
else if (do_insert) i + text.length  | 
| 38426 | 161  | 
else (i - text.length) max start  | 
| 34286 | 162  | 
|
| 43425 | 163  | 
def convert(i: Offset): Offset = transform(is_insert, i)  | 
164  | 
def revert(i: Offset): Offset = transform(!is_insert, i)  | 
|
| 38425 | 165  | 
|
| 34286 | 166  | 
|
| 38425 | 167  | 
/* edit strings */  | 
168  | 
||
| 38426 | 169  | 
private def insert(i: Offset, string: String): String =  | 
170  | 
string.substring(0, i) + text + string.substring(i)  | 
|
| 34276 | 171  | 
|
| 
65155
 
25bccf5bf33e
clarified native Text.Offset versus Text.Length index Int;
 
wenzelm 
parents: 
65154 
diff
changeset
 | 
172  | 
private def remove(i: Offset, count: Offset, string: String): String =  | 
| 38426 | 173  | 
string.substring(0, i) + string.substring(i + count)  | 
| 38425 | 174  | 
|
| 
65155
 
25bccf5bf33e
clarified native Text.Offset versus Text.Length index Int;
 
wenzelm 
parents: 
65154 
diff
changeset
 | 
175  | 
def can_edit(string: String, shift: Offset): Boolean =  | 
| 38425 | 176  | 
shift <= start && start < shift + string.length  | 
177  | 
||
| 
65155
 
25bccf5bf33e
clarified native Text.Offset versus Text.Length index Int;
 
wenzelm 
parents: 
65154 
diff
changeset
 | 
178  | 
def edit(string: String, shift: Offset): (Option[Edit], String) =  | 
| 38425 | 179  | 
if (!can_edit(string, shift)) (Some(this), string)  | 
180  | 
else if (is_insert) (None, insert(start - shift, string))  | 
|
181  | 
      else {
 | 
|
| 38426 | 182  | 
val i = start - shift  | 
183  | 
val count = text.length min (string.length - i)  | 
|
| 38425 | 184  | 
val rest =  | 
185  | 
if (count == text.length) None  | 
|
186  | 
else Some(Edit.remove(start, text.substring(count)))  | 
|
| 38426 | 187  | 
(rest, remove(i, count, string))  | 
| 38425 | 188  | 
}  | 
189  | 
}  | 
|
| 64682 | 190  | 
|
191  | 
||
192  | 
/* text length wrt. encoding */  | 
|
193  | 
||
194  | 
trait Length  | 
|
195  | 
  {
 | 
|
196  | 
def apply(text: String): Int  | 
|
| 65154 | 197  | 
def offset(text: String, i: Int): Option[Offset]  | 
| 64682 | 198  | 
}  | 
199  | 
||
200  | 
object Length extends Length  | 
|
201  | 
  {
 | 
|
202  | 
def apply(text: String): Int = text.length  | 
|
| 65154 | 203  | 
def offset(text: String, i: Int): Option[Offset] =  | 
| 64682 | 204  | 
if (0 <= i && i <= text.length) Some(i) else None  | 
205  | 
||
206  | 
val encodings: List[(String, Length)] =  | 
|
207  | 
List(  | 
|
208  | 
"UTF-16" -> Length,  | 
|
209  | 
"UTF-8" -> UTF8.Length,  | 
|
210  | 
"codepoints" -> Codepoint.Length,  | 
|
211  | 
"symbols" -> Symbol.Length)  | 
|
212  | 
||
213  | 
def encoding(name: String): Length =  | 
|
214  | 
      encodings.collectFirst({ case (a, length) if name == a => length }) getOrElse
 | 
|
215  | 
        error("Bad text length encoding: " + quote(name) +
 | 
|
216  | 
" (expected " + commas_quote(encodings.map(_._1)) + ")")  | 
|
217  | 
}  | 
|
| 34276 | 218  | 
}  |