src/Tools/jEdit/src/spell_checker.scala
author wenzelm
Sat, 12 Apr 2014 21:38:38 +0200
changeset 56552 76cf86240cb7
parent 56549 7cfc7aa121f3
child 56557 18d921496aa5
permissions -rw-r--r--
some case-mangling; clarified use of locale;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     1
/*  Title:      Tools/jEdit/src/spell_checker.scala
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     3
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     4
Spell-checker based on JOrtho (see http://sourceforge.net/projects/jortho).
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     5
*/
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     6
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     7
package isabelle.jedit
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     8
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
     9
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    10
import isabelle._
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    11
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    12
import java.lang.Class
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    13
import java.net.URL
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    14
import java.util.Locale
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    15
import java.text.BreakIterator
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    16
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    17
import scala.collection.mutable
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    18
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    19
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    20
object Spell_Checker
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    21
{
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    22
  def known_dictionaries: List[String] =
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    23
    space_explode(',', Isabelle_System.getenv_strict("JORTHO_DICTIONARIES"))
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    24
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    25
  def apply(lang: String): Spell_Checker =
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    26
    if (known_dictionaries.contains(lang))
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    27
      new Spell_Checker(
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    28
        lang, Locale.forLanguageTag(lang),
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    29
        classOf[com.inet.jortho.SpellChecker].getResource("dictionary_" + lang + ".ortho"))
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    30
    else error("Unknown spell-checker dictionary for " + quote(lang))
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    31
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    32
  def apply(lang: String, locale: Locale, dict: URL): Spell_Checker =
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    33
    new Spell_Checker(lang, locale, dict)
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    34
}
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    35
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    36
class Spell_Checker private(lang: String, locale: Locale, dict: URL)
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    37
{
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    38
  override def toString: String = "Spell_Checker(" + lang + ")"
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    39
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    40
  private val dictionary =
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    41
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    42
    val factory_class = Class.forName("com.inet.jortho.DictionaryFactory")
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    43
    val factory_cons = factory_class.getConstructor()
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    44
    factory_cons.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    45
    val factory = factory_cons.newInstance()
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    46
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    47
    val load_word_list = factory_class.getDeclaredMethod("loadWordList", classOf[URL])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    48
    load_word_list.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    49
    load_word_list.invoke(factory, dict)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    50
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    51
    val create = factory_class.getDeclaredMethod("create")
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    52
    create.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    53
    create.invoke(factory)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    54
  }
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    55
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    56
  def load(file_name: String)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    57
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    58
    val m = dictionary.getClass.getDeclaredMethod("load", classOf[String])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    59
    m.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    60
    m.invoke(dictionary, file_name)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    61
  }
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    62
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    63
  def save(file_name: String)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    64
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    65
    val m = dictionary.getClass.getDeclaredMethod("save", classOf[String])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    66
    m.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    67
    m.invoke(dictionary, file_name)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    68
  }
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    69
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    70
  def add(word: String)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    71
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    72
    val m = dictionary.getClass.getDeclaredMethod("add", classOf[String])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    73
    m.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    74
    m.invoke(dictionary, word)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    75
  }
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    76
56552
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    77
  def contains(word: String): Boolean =
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    78
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    79
    val m = dictionary.getClass.getSuperclass.getDeclaredMethod("exist", classOf[String])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    80
    m.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    81
    m.invoke(dictionary, word).asInstanceOf[java.lang.Boolean].booleanValue
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    82
  }
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    83
56552
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    84
  def check(word: String): Boolean =
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    85
    contains(word) ||
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    86
    Library.is_all_caps(word) && contains(Library.lowercase(word, locale)) ||
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    87
    Library.is_capitalized(word) &&
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    88
      (contains(Library.lowercase(word, locale)) || contains(Library.uppercase(word, locale)))
76cf86240cb7 some case-mangling;
wenzelm
parents: 56549
diff changeset
    89
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    90
  def complete(word: String): List[String] =
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    91
  {
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    92
    val m = dictionary.getClass.getSuperclass.
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    93
      getDeclaredMethod("searchSuggestions", classOf[String])
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    94
    m.setAccessible(true)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    95
    m.invoke(dictionary, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString)
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
    96
  }
56549
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    97
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    98
  def bad_words(text: String): List[Text.Range] =
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
    99
  {
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   100
    val result = new mutable.ListBuffer[Text.Range]
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   101
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   102
    val it = BreakIterator.getWordInstance(locale)
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   103
    it.setText(text)
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   104
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   105
    var i = 0
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   106
    var j = it.next
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   107
    while (j != BreakIterator.DONE) {
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   108
      val word = text.substring(i, j)
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   109
      if (word.length >= 2 && Character.isLetter(word(0)) && !check(word))
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   110
        result += Text.Range(i, j)
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   111
      i = j
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   112
      j = it.next
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   113
    }
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   114
    result.toList
7cfc7aa121f3 added bad_words;
wenzelm
parents: 56547
diff changeset
   115
  }
56547
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
   116
}
e9bb73d7b6cf added spell-checker based on jortho-1.0;
wenzelm
parents:
diff changeset
   117