src/Pure/General/word.scala
author wenzelm
Mon, 01 Aug 2016 11:54:32 +0200
changeset 63573 8976c5bc9e97
parent 63450 afd657fffdf9
child 64370 865b39487b5d
permissions -rw-r--r--
more restrictive retrieval of rules for matching instead of unification -- relevant for performance of printing terms;

/*  Title:      Pure/General/word.scala
    Module:     PIDE
    Author:     Makarius

Support for words within Unicode text.
*/

package isabelle

import java.text.Bidi
import java.util.Locale


object Word
{
  /* codepoints */

  def codepoint_iterator(str: String): Iterator[Int] =
    new Iterator[Int] {
      var offset = 0
      def hasNext: Boolean = offset < str.length
      def next: Int =
      {
        val c = str.codePointAt(offset)
        offset += Character.charCount(c)
        c
      }
    }

  def codepoint(c: Int): String = new String(Array(c), 0, 1)


  /* directionality */

  def bidi_detect(str: String): Boolean =
    str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length)

  def bidi_override(str: String): String =
    if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str


  /* case */

  def lowercase(str: String): String = str.toLowerCase(Locale.ROOT)
  def uppercase(str: String): String = str.toUpperCase(Locale.ROOT)

  def capitalize(str: String): String =
    if (str.length == 0) str
    else {
      val n = Character.charCount(str.codePointAt(0))
      uppercase(str.substring(0, n)) + lowercase(str.substring(n))
    }

  def perhaps_capitalize(str: String): String =
    if (codepoint_iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c)))
      capitalize(str)
    else str

  sealed abstract class Case
  case object Lowercase extends Case
  case object Uppercase extends Case
  case object Capitalized extends Case

  object Case
  {
    def apply(c: Case, str: String): String =
      c match {
        case Lowercase => lowercase(str)
        case Uppercase => uppercase(str)
        case Capitalized => capitalize(str)
      }
    def unapply(str: String): Option[Case] =
      if (str.nonEmpty) {
        if (codepoint_iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase)
        else if (codepoint_iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase)
        else {
          val it = codepoint_iterator(str)
          if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_)))
            Some(Capitalized)
          else None
        }
      }
      else None
  }


  /* sequence of words */

  def implode(words: Iterable[String]): String = words.iterator.mkString(" ")

  def explode(sep: Char => Boolean, text: String): List[String] =
    Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList

  def explode(sep: Char, text: String): List[String] =
    explode(_ == sep, text)

  def explode(text: String): List[String] =
    explode(Character.isWhitespace(_), text)


  /* brackets */

  val open_brackets = "([{«‹⟨⌈⌊⦇⟦⦃"
  val close_brackets = ")]}»›⟩⌉⌋⦈⟧⦄"
}