author | wenzelm |
Sun, 27 Oct 2024 11:02:21 +0100 | |
changeset 81266 | 8300511f4c45 |
parent 80477 | d32748570069 |
child 81647 | ae670d860912 |
permissions | -rw-r--r-- |
56599 | 1 |
/* Title: Pure/General/word.scala |
2 |
Author: Makarius |
|
3 |
||
56747 | 4 |
Support for words within Unicode text. |
56599 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
62812
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
9 |
import java.text.Bidi |
56599 | 10 |
import java.util.Locale |
11 |
||
12 |
||
75393 | 13 |
object Word { |
62812
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
14 |
/* directionality */ |
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
15 |
|
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
16 |
def bidi_detect(str: String): Boolean = |
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
17 |
str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length) |
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
18 |
|
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
19 |
def bidi_override(str: String): String = |
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
20 |
if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str |
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
21 |
|
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
wenzelm
parents:
59319
diff
changeset
|
22 |
|
56600 | 23 |
/* case */ |
24 |
||
56599 | 25 |
def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) |
26 |
def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) |
|
27 |
||
78614 | 28 |
def capitalized(str: String): String = |
56599 | 29 |
if (str.length == 0) str |
56601 | 30 |
else { |
31 |
val n = Character.charCount(str.codePointAt(0)) |
|
56602 | 32 |
uppercase(str.substring(0, n)) + lowercase(str.substring(n)) |
56601 | 33 |
} |
34 |
||
78614 | 35 |
def perhaps_capitalized(str: String): String = |
64610 | 36 |
if (Codepoint.iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c))) |
78614 | 37 |
capitalized(str) |
57087
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
wenzelm
parents:
56792
diff
changeset
|
38 |
else str |
56609
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
wenzelm
parents:
56602
diff
changeset
|
39 |
|
75393 | 40 |
object Case { |
56601 | 41 |
def apply(c: Case, str: String): String = |
42 |
c match { |
|
78613 | 43 |
case Case.lowercase => Word.lowercase(str) |
44 |
case Case.uppercase => Word.uppercase(str) |
|
78614 | 45 |
case Case.capitalized => Word.capitalized(str) |
56601 | 46 |
} |
47 |
def unapply(str: String): Option[Case] = |
|
59319 | 48 |
if (str.nonEmpty) { |
78613 | 49 |
if (Codepoint.iterator(str).forall(Character.isLowerCase)) Some(Case.lowercase) |
50 |
else if (Codepoint.iterator(str).forall(Character.isUpperCase)) Some(Case.uppercase) |
|
56601 | 51 |
else { |
64610 | 52 |
val it = Codepoint.iterator(str) |
73344 | 53 |
if (Character.isUpperCase(it.next()) && it.forall(Character.isLowerCase)) |
78613 | 54 |
Some(Case.capitalized) |
56601 | 55 |
else None |
56 |
} |
|
57 |
} |
|
58 |
else None |
|
59 |
} |
|
56599 | 60 |
|
78613 | 61 |
enum Case { case lowercase, uppercase, capitalized } |
62 |
||
56600 | 63 |
|
64 |
/* sequence of words */ |
|
65 |
||
66 |
def implode(words: Iterable[String]): String = words.iterator.mkString(" ") |
|
67 |
||
68 |
def explode(sep: Char => Boolean, text: String): List[String] = |
|
80477 | 69 |
List.from( |
70 |
for (s <- Library.separated_chunks(sep, text) if !s.isEmpty) |
|
71 |
yield s.toString) |
|
56600 | 72 |
|
73 |
def explode(sep: Char, text: String): List[String] = |
|
74 |
explode(_ == sep, text) |
|
75 |
||
76 |
def explode(text: String): List[String] = |
|
71867 | 77 |
explode(Character.isWhitespace _, text) |
63450 | 78 |
|
79 |
||
80 |
/* brackets */ |
|
81 |
||
71866 | 82 |
val open_brackets = "([{«‹⟨⌈⌊⦇⟦⦃⟪" |
83 |
val close_brackets = ")]}»›⟩⌉⌋⦈⟧⦄⟫" |
|
56599 | 84 |
} |