| author | wenzelm |
| Mon, 12 May 2014 12:38:17 +0200 | |
| changeset 56939 | c2ddbf327bbd |
| parent 56792 | 792dd0e9cebb |
| child 57087 | 16536c15d749 |
| permissions | -rw-r--r-- |
| 56599 | 1 |
/* Title: Pure/General/word.scala |
| 56748 | 2 |
Module: PIDE |
| 56599 | 3 |
Author: Makarius |
4 |
||
| 56747 | 5 |
Support for words within Unicode text. |
| 56599 | 6 |
*/ |
7 |
||
8 |
package isabelle |
|
9 |
||
10 |
||
11 |
import java.util.Locale |
|
12 |
||
13 |
||
14 |
object Word |
|
15 |
{
|
|
| 56601 | 16 |
/* codepoints */ |
17 |
||
18 |
def codepoint_iterator(str: String): Iterator[Int] = |
|
19 |
new Iterator[Int] {
|
|
20 |
var offset = 0 |
|
21 |
def hasNext: Boolean = offset < str.length |
|
22 |
def next: Int = |
|
23 |
{
|
|
24 |
val c = str.codePointAt(offset) |
|
25 |
offset += Character.charCount(c) |
|
26 |
c |
|
27 |
} |
|
28 |
} |
|
29 |
||
| 56792 | 30 |
def codepoint(c: Int): String = new String(Array(c), 0, 1) |
31 |
||
| 56601 | 32 |
|
| 56600 | 33 |
/* case */ |
34 |
||
| 56599 | 35 |
def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) |
36 |
def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) |
|
37 |
||
38 |
def capitalize(str: String): String = |
|
39 |
if (str.length == 0) str |
|
| 56601 | 40 |
else {
|
41 |
val n = Character.charCount(str.codePointAt(0)) |
|
| 56602 | 42 |
uppercase(str.substring(0, n)) + lowercase(str.substring(n)) |
| 56601 | 43 |
} |
44 |
||
|
56609
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
45 |
def perhaps_capitalize(str: String): String = |
|
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
46 |
str match {
|
|
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
47 |
case Case(c) if c != Lowercase => str |
|
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
48 |
case _ => capitalize(str) |
|
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
49 |
} |
|
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
50 |
|
| 56601 | 51 |
sealed abstract class Case |
52 |
case object Lowercase extends Case |
|
53 |
case object Uppercase extends Case |
|
54 |
case object Capitalized extends Case |
|
55 |
||
56 |
object Case |
|
57 |
{
|
|
58 |
def apply(c: Case, str: String): String = |
|
59 |
c match {
|
|
60 |
case Lowercase => lowercase(str) |
|
61 |
case Uppercase => uppercase(str) |
|
62 |
case Capitalized => capitalize(str) |
|
63 |
} |
|
64 |
def unapply(str: String): Option[Case] = |
|
65 |
if (!str.isEmpty) {
|
|
66 |
if (codepoint_iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase) |
|
67 |
else if (codepoint_iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase) |
|
68 |
else {
|
|
69 |
val it = codepoint_iterator(str) |
|
70 |
if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_))) |
|
71 |
Some(Capitalized) |
|
72 |
else None |
|
73 |
} |
|
74 |
} |
|
75 |
else None |
|
76 |
} |
|
| 56599 | 77 |
|
| 56600 | 78 |
|
79 |
/* sequence of words */ |
|
80 |
||
81 |
def implode(words: Iterable[String]): String = words.iterator.mkString(" ")
|
|
82 |
||
83 |
def explode(sep: Char => Boolean, text: String): List[String] = |
|
84 |
Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList |
|
85 |
||
86 |
def explode(sep: Char, text: String): List[String] = |
|
87 |
explode(_ == sep, text) |
|
88 |
||
89 |
def explode(text: String): List[String] = |
|
| 56747 | 90 |
explode(Character.isWhitespace(_), text) |
| 56599 | 91 |
} |
92 |