| author | wenzelm | 
| Fri, 26 Sep 2014 15:10:02 +0200 | |
| changeset 58465 | bd06c6479748 | 
| parent 57087 | 16536c15d749 | 
| child 59319 | 677615cba30d | 
| permissions | -rw-r--r-- | 
| 56599 | 1 | /* Title: Pure/General/word.scala | 
| 56748 | 2 | Module: PIDE | 
| 56599 | 3 | Author: Makarius | 
| 4 | ||
| 56747 | 5 | Support for words within Unicode text. | 
| 56599 | 6 | */ | 
| 7 | ||
| 8 | package isabelle | |
| 9 | ||
| 10 | ||
| 11 | import java.util.Locale | |
| 12 | ||
| 13 | ||
| 14 | object Word | |
| 15 | {
 | |
| 56601 | 16 | /* codepoints */ | 
| 17 | ||
| 18 | def codepoint_iterator(str: String): Iterator[Int] = | |
| 19 |     new Iterator[Int] {
 | |
| 20 | var offset = 0 | |
| 21 | def hasNext: Boolean = offset < str.length | |
| 22 | def next: Int = | |
| 23 |       {
 | |
| 24 | val c = str.codePointAt(offset) | |
| 25 | offset += Character.charCount(c) | |
| 26 | c | |
| 27 | } | |
| 28 | } | |
| 29 | ||
| 56792 | 30 | def codepoint(c: Int): String = new String(Array(c), 0, 1) | 
| 31 | ||
| 56601 | 32 | |
| 56600 | 33 | /* case */ | 
| 34 | ||
| 56599 | 35 | def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) | 
| 36 | def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) | |
| 37 | ||
| 38 | def capitalize(str: String): String = | |
| 39 | if (str.length == 0) str | |
| 56601 | 40 |     else {
 | 
| 41 | val n = Character.charCount(str.codePointAt(0)) | |
| 56602 | 42 | uppercase(str.substring(0, n)) + lowercase(str.substring(n)) | 
| 56601 | 43 | } | 
| 44 | ||
| 56609 
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
 wenzelm parents: 
56602diff
changeset | 45 | def perhaps_capitalize(str: String): String = | 
| 57087 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 wenzelm parents: 
56792diff
changeset | 46 | if (codepoint_iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c))) | 
| 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 wenzelm parents: 
56792diff
changeset | 47 | capitalize(str) | 
| 
16536c15d749
capitalize even more carefully (see 5ac67041ccf8), e.g. relevant for option "z3_non_commercial" and prospective "MaSh";
 wenzelm parents: 
56792diff
changeset | 48 | else str | 
| 56609 
5ac67041ccf8
capitalize more carefully, e.g. relevant for option "ML_exception_trace";
 wenzelm parents: 
56602diff
changeset | 49 | |
| 56601 | 50 | sealed abstract class Case | 
| 51 | case object Lowercase extends Case | |
| 52 | case object Uppercase extends Case | |
| 53 | case object Capitalized extends Case | |
| 54 | ||
| 55 | object Case | |
| 56 |   {
 | |
| 57 | def apply(c: Case, str: String): String = | |
| 58 |       c match {
 | |
| 59 | case Lowercase => lowercase(str) | |
| 60 | case Uppercase => uppercase(str) | |
| 61 | case Capitalized => capitalize(str) | |
| 62 | } | |
| 63 | def unapply(str: String): Option[Case] = | |
| 64 |       if (!str.isEmpty) {
 | |
| 65 | if (codepoint_iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase) | |
| 66 | else if (codepoint_iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase) | |
| 67 |         else {
 | |
| 68 | val it = codepoint_iterator(str) | |
| 69 | if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_))) | |
| 70 | Some(Capitalized) | |
| 71 | else None | |
| 72 | } | |
| 73 | } | |
| 74 | else None | |
| 75 | } | |
| 56599 | 76 | |
| 56600 | 77 | |
| 78 | /* sequence of words */ | |
| 79 | ||
| 80 |   def implode(words: Iterable[String]): String = words.iterator.mkString(" ")
 | |
| 81 | ||
| 82 | def explode(sep: Char => Boolean, text: String): List[String] = | |
| 83 | Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList | |
| 84 | ||
| 85 | def explode(sep: Char, text: String): List[String] = | |
| 86 | explode(_ == sep, text) | |
| 87 | ||
| 88 | def explode(text: String): List[String] = | |
| 56747 | 89 | explode(Character.isWhitespace(_), text) | 
| 56599 | 90 | } | 
| 91 |