more systematic text length wrt. encoding;
--- a/src/Pure/General/codepoint.scala Tue Dec 20 10:45:20 2016 +0100
+++ b/src/Pure/General/codepoint.scala Tue Dec 20 16:08:02 2016 +0100
@@ -24,5 +24,30 @@
}
def length(s: String): Int = iterator(s).length
- object Length extends Line.Length { def apply(s: String): Int = Codepoint.length(s) }
+
+ trait Length extends isabelle.Length
+ {
+ protected def codepoint_length(c: Int): Int = 1
+
+ def apply(s: String): Int =
+ (0 /: iterator(s)) { case (n, c) => n + codepoint_length(c) }
+
+ def offset(s: String, i: Int): Option[Text.Offset] =
+ {
+ if (i <= 0 || s.forall(_ < 0x80)) isabelle.Length.offset(s, i)
+ else {
+ val length = s.length
+ var offset = 0
+ var j = 0
+ while (offset < length && j < i) {
+ val c = s.codePointAt(offset)
+ offset += Character.charCount(c)
+ j += codepoint_length(c)
+ }
+ if (j >= i) Some(offset) else None
+ }
+ }
+ }
+
+ object Length extends Length
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/length.scala Tue Dec 20 16:08:02 2016 +0100
@@ -0,0 +1,31 @@
+/* Title: Pure/General/length.scala
+ Author: Makarius
+
+Text length wrt. encoding.
+*/
+
+package isabelle
+
+
+trait Length
+{
+ def apply(text: String): Int
+ def offset(text: String, i: Int): Option[Text.Offset]
+}
+
+object Length extends Length
+{
+ def apply(text: String): Int = text.length
+ def offset(text: String, i: Int): Option[Text.Offset] =
+ if (0 <= i && i <= text.length) Some(i) else None
+
+ def encoding(name: String): Length =
+ name match {
+ case "UTF-8" => UTF8.Length
+ case "UTF-16" => Length
+ case "codepoints" => Codepoint.Length
+ case "symbols" => Symbol.Length
+ case _ =>
+ error("Bad text encoding: " + name + " (expected UTF-8, UTF-16, codepoints, symbols)")
+ }
+}
--- a/src/Pure/General/symbol.scala Tue Dec 20 10:45:20 2016 +0100
+++ b/src/Pure/General/symbol.scala Tue Dec 20 16:08:02 2016 +0100
@@ -129,7 +129,15 @@
def explode(text: CharSequence): List[Symbol] = iterator(text).toList
def length(text: CharSequence): Int = iterator(text).length
- object Length extends Line.Length { def apply(text: String): Int = length(text) }
+
+ object Length extends isabelle.Length
+ {
+ def apply(text: String): Int = length(text)
+ def offset(text: String, i: Int): Option[Text.Offset] =
+ if (i <= 0 || iterator(text).forall(s => s.length == 1)) isabelle.Length.offset(text, i)
+ else if (i <= length(text)) Some(iterator(text).take(i).mkString.length)
+ else None
+ }
/* decoding offsets */
--- a/src/Pure/PIDE/line.scala Tue Dec 20 10:45:20 2016 +0100
+++ b/src/Pure/PIDE/line.scala Tue Dec 20 16:08:02 2016 +0100
@@ -12,12 +12,6 @@
object Line
{
- /* length wrt. encoding */
-
- trait Length { def apply(text: String): Int }
- object Length extends Length { def apply(text: String): Int = text.length }
-
-
/* position */
object Position
@@ -37,21 +31,12 @@
case i => i
}
- private def advance[A](iterator: Iterator[A], is_newline: A => Boolean): Position =
- {
- var l = line
- var c = column
- for (x <- iterator) {
- if (is_newline(x)) { l += 1; c = 0 } else c += 1
+ def advance(text: String, length: Length = Length): Position =
+ if (text.isEmpty) this
+ else {
+ val lines = Library.split_lines(text)
+ Position(line + lines.length - 1, column + length(Library.trim_line(lines.last)))
}
- Position(l, c)
- }
-
- def advance(text: String): Position =
- if (text.isEmpty) this else advance[Char](text.iterator, _ == '\n')
-
- def advance_codepoints(text: String): Position =
- if (text.isEmpty) this else advance[Int](Codepoint.iterator(text), _ == 10)
}
@@ -94,7 +79,7 @@
}
override def hashCode(): Int = lines.hashCode
- private def position(advance: (Position, String) => Position, offset: Text.Offset): Position =
+ def position(offset: Text.Offset, length: Length = Length): Position =
{
@tailrec def move(i: Text.Offset, lines_count: Int, lines_rest: List[Line]): Position =
{
@@ -102,26 +87,23 @@
case Nil => require(i == 0); Position(lines_count, 0)
case line :: ls =>
val n = line.text.length
- if (ls.isEmpty || i <= n) advance(Position(lines_count, 0), line.text.substring(n - i))
+ if (ls.isEmpty || i <= n)
+ Position(lines_count, 0).advance(line.text.substring(n - i), length)
else move(i - (n + 1), lines_count + 1, ls)
}
}
move(offset, 0, lines)
}
- def position(i: Text.Offset): Position = position(_.advance(_), i)
- def position_codepoints(i: Text.Offset): Position = position(_.advance_codepoints(_), i)
-
- // FIXME codepoints
- def offset(pos: Position): Option[Text.Offset] =
+ def offset(pos: Position, length: Length = Length): Option[Text.Offset] =
{
val l = pos.line
val c = pos.column
if (0 <= l && l < lines.length) {
val line_offset =
if (l == 0) 0
- else (0 /: lines.take(l - 1))({ case (n, line) => n + line.text.length + 1 })
- if (c <= lines(l).text.length) Some(line_offset + c) else None
+ else (0 /: lines.iterator.take(l - 1))({ case (n, line) => n + length(line.text) + 1 })
+ length.offset(lines(l).text, c).map(line_offset + _)
}
else None
}
--- a/src/Pure/System/utf8.scala Tue Dec 20 10:45:20 2016 +0100
+++ b/src/Pure/System/utf8.scala Tue Dec 20 16:08:02 2016 +0100
@@ -21,16 +21,14 @@
def bytes(s: String): Array[Byte] = s.getBytes(charset)
- def bytes_length(s: String): Int =
- (0 /: Codepoint.iterator(s)) {
- case (n, i) =>
- if (i < 0x80) n + 1
- else if (i < 0x800) n + 2
- else if (i < 0x10000) n + 3
- else n + 4
- }
-
- object Length extends Line.Length { def apply(s: String): Int = bytes_length(s) }
+ object Length extends Codepoint.Length
+ {
+ override def codepoint_length(c: Int): Int =
+ if (c < 0x80) 1
+ else if (c < 0x800) 2
+ else if (c < 0x10000) 3
+ else 4
+ }
/* permissive UTF-8 decoding */
@@ -99,4 +97,3 @@
new Decode_Chars(decode, buffer, start, end)
}
}
-
--- a/src/Pure/build-jars Tue Dec 20 10:45:20 2016 +0100
+++ b/src/Pure/build-jars Tue Dec 20 16:08:02 2016 +0100
@@ -49,6 +49,7 @@
General/graphics_file.scala
General/http_server.scala
General/json.scala
+ General/length.scala
General/linear_set.scala
General/logger.scala
General/long_name.scala