| author | wenzelm | 
| Sat, 11 Jan 2025 21:58:47 +0100 | |
| changeset 81766 | ebf113cd6d2c | 
| parent 81647 | ae670d860912 | 
| child 82120 | a4aa45999dd7 | 
| permissions | -rw-r--r-- | 
| 56599 | 1 | /* Title: Pure/General/word.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 56747 | 4 | Support for words within Unicode text. | 
| 56599 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 62812 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 9 | import java.text.Bidi | 
| 56599 | 10 | import java.util.Locale | 
| 11 | ||
| 12 | ||
| 75393 | 13 | object Word {
 | 
| 62812 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 14 | /* directionality */ | 
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 15 | |
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 16 | def bidi_detect(str: String): Boolean = | 
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 17 | str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length) | 
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 18 | |
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 19 | def bidi_override(str: String): String = | 
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 20 | if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str | 
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 21 | |
| 
ce22e5c3d4ce
more robust display of bidirectional Unicode text: enforce left-to-right;
 wenzelm parents: 
59319diff
changeset | 22 | |
| 56600 | 23 | /* case */ | 
| 24 | ||
| 56599 | 25 | def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) | 
| 26 | def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) | |
| 27 | ||
| 78614 | 28 | def capitalized(str: String): String = | 
| 56599 | 29 | if (str.length == 0) str | 
| 56601 | 30 |     else {
 | 
| 31 | val n = Character.charCount(str.codePointAt(0)) | |
| 56602 | 32 | uppercase(str.substring(0, n)) + lowercase(str.substring(n)) | 
| 56601 | 33 | } | 
| 34 | ||
| 78614 | 35 | def perhaps_capitalized(str: String): String = | 
| 64610 | 36 | if (Codepoint.iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c))) | 
| 78614 | 37 | capitalized(str) | 
| 57087 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 wenzelm parents: 
56792diff
changeset | 38 | else str | 
| 56609 
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
 wenzelm parents: 
56602diff
changeset | 39 | |
| 75393 | 40 |   object Case {
 | 
| 56601 | 41 | def apply(c: Case, str: String): String = | 
| 42 |       c match {
 | |
| 78613 | 43 | case Case.lowercase => Word.lowercase(str) | 
| 44 | case Case.uppercase => Word.uppercase(str) | |
| 78614 | 45 | case Case.capitalized => Word.capitalized(str) | 
| 56601 | 46 | } | 
| 47 | def unapply(str: String): Option[Case] = | |
| 59319 | 48 |       if (str.nonEmpty) {
 | 
| 78613 | 49 | if (Codepoint.iterator(str).forall(Character.isLowerCase)) Some(Case.lowercase) | 
| 50 | else if (Codepoint.iterator(str).forall(Character.isUpperCase)) Some(Case.uppercase) | |
| 56601 | 51 |         else {
 | 
| 64610 | 52 | val it = Codepoint.iterator(str) | 
| 73344 | 53 | if (Character.isUpperCase(it.next()) && it.forall(Character.isLowerCase)) | 
| 78613 | 54 | Some(Case.capitalized) | 
| 56601 | 55 | else None | 
| 56 | } | |
| 57 | } | |
| 58 | else None | |
| 59 | } | |
| 56599 | 60 | |
| 78613 | 61 |   enum Case { case lowercase, uppercase, capitalized }
 | 
| 62 | ||
| 56600 | 63 | |
| 64 | /* sequence of words */ | |
| 65 | ||
| 66 |   def implode(words: Iterable[String]): String = words.iterator.mkString(" ")
 | |
| 67 | ||
| 68 | def explode(sep: Char => Boolean, text: String): List[String] = | |
| 80477 | 69 | List.from( | 
| 70 | for (s <- Library.separated_chunks(sep, text) if !s.isEmpty) | |
| 71 | yield s.toString) | |
| 56600 | 72 | |
| 73 | def explode(sep: Char, text: String): List[String] = | |
| 74 | explode(_ == sep, text) | |
| 75 | ||
| 76 | def explode(text: String): List[String] = | |
| 71867 | 77 | explode(Character.isWhitespace _, text) | 
| 63450 | 78 | |
| 81647 | 79 |   def informal(text: String): String = implode(explode('_', text))
 | 
| 80 | ||
| 63450 | 81 | |
| 82 | /* brackets */ | |
| 83 | ||
| 71866 | 84 |   val open_brackets = "([{«‹⟨⌈⌊⦇⟦⦃⟪"
 | 
| 85 | val close_brackets = ")]}»›⟩⌉⌋⦈⟧⦄⟫" | |
| 56599 | 86 | } |