| author | Manuel Eberl <eberlm@in.tum.de> | 
| Fri, 13 Jul 2018 16:54:36 +0100 | |
| changeset 68624 | 205d352ed727 | 
| parent 64610 | 1b89608974e9 | 
| child 71601 | 97ccf48c2f0c | 
| permissions | -rw-r--r-- | 
| 56599 | 1  | 
/* Title: Pure/General/word.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
| 56747 | 4  | 
Support for words within Unicode text.  | 
| 56599 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 
62812
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
9  | 
import java.text.Bidi  | 
| 56599 | 10  | 
import java.util.Locale  | 
11  | 
||
12  | 
||
13  | 
object Word  | 
|
14  | 
{
 | 
|
| 
62812
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
15  | 
/* directionality */  | 
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
16  | 
|
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
17  | 
def bidi_detect(str: String): Boolean =  | 
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
18  | 
str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length)  | 
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
19  | 
|
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
20  | 
def bidi_override(str: String): String =  | 
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
21  | 
if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str  | 
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
22  | 
|
| 
 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 
wenzelm 
parents: 
59319 
diff
changeset
 | 
23  | 
|
| 56600 | 24  | 
/* case */  | 
25  | 
||
| 56599 | 26  | 
def lowercase(str: String): String = str.toLowerCase(Locale.ROOT)  | 
27  | 
def uppercase(str: String): String = str.toUpperCase(Locale.ROOT)  | 
|
28  | 
||
29  | 
def capitalize(str: String): String =  | 
|
30  | 
if (str.length == 0) str  | 
|
| 56601 | 31  | 
    else {
 | 
32  | 
val n = Character.charCount(str.codePointAt(0))  | 
|
| 56602 | 33  | 
uppercase(str.substring(0, n)) + lowercase(str.substring(n))  | 
| 56601 | 34  | 
}  | 
35  | 
||
| 
56609
 
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
 
wenzelm 
parents: 
56602 
diff
changeset
 | 
36  | 
def perhaps_capitalize(str: String): String =  | 
| 64610 | 37  | 
if (Codepoint.iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c)))  | 
| 
57087
 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 
wenzelm 
parents: 
56792 
diff
changeset
 | 
38  | 
capitalize(str)  | 
| 
 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 
wenzelm 
parents: 
56792 
diff
changeset
 | 
39  | 
else str  | 
| 
56609
 
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
 
wenzelm 
parents: 
56602 
diff
changeset
 | 
40  | 
|
| 56601 | 41  | 
sealed abstract class Case  | 
42  | 
case object Lowercase extends Case  | 
|
43  | 
case object Uppercase extends Case  | 
|
44  | 
case object Capitalized extends Case  | 
|
45  | 
||
46  | 
object Case  | 
|
47  | 
  {
 | 
|
48  | 
def apply(c: Case, str: String): String =  | 
|
49  | 
      c match {
 | 
|
50  | 
case Lowercase => lowercase(str)  | 
|
51  | 
case Uppercase => uppercase(str)  | 
|
52  | 
case Capitalized => capitalize(str)  | 
|
53  | 
}  | 
|
54  | 
def unapply(str: String): Option[Case] =  | 
|
| 59319 | 55  | 
      if (str.nonEmpty) {
 | 
| 64610 | 56  | 
if (Codepoint.iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase)  | 
57  | 
else if (Codepoint.iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase)  | 
|
| 56601 | 58  | 
        else {
 | 
| 64610 | 59  | 
val it = Codepoint.iterator(str)  | 
| 56601 | 60  | 
if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_)))  | 
61  | 
Some(Capitalized)  | 
|
62  | 
else None  | 
|
63  | 
}  | 
|
64  | 
}  | 
|
65  | 
else None  | 
|
66  | 
}  | 
|
| 56599 | 67  | 
|
| 56600 | 68  | 
|
69  | 
/* sequence of words */  | 
|
70  | 
||
71  | 
  def implode(words: Iterable[String]): String = words.iterator.mkString(" ")
 | 
|
72  | 
||
73  | 
def explode(sep: Char => Boolean, text: String): List[String] =  | 
|
74  | 
Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList  | 
|
75  | 
||
76  | 
def explode(sep: Char, text: String): List[String] =  | 
|
77  | 
explode(_ == sep, text)  | 
|
78  | 
||
79  | 
def explode(text: String): List[String] =  | 
|
| 56747 | 80  | 
explode(Character.isWhitespace(_), text)  | 
| 63450 | 81  | 
|
82  | 
||
83  | 
/* brackets */  | 
|
84  | 
||
85  | 
  val open_brackets = "([{«‹⟨⌈⌊⦇⟦⦃"
 | 
|
86  | 
val close_brackets = ")]}»›⟩⌉⌋⦈⟧⦄"  | 
|
| 56599 | 87  | 
}  |