src/Pure/Tools/spell_checker.scala
author wenzelm
Tue Nov 07 16:50:26 2017 +0100 (20 months ago)
changeset 67026 687c822ee5e3
parent 67014 e6a695d6a6b2
child 68224 1f7308050349
permissions -rw-r--r--
tuned signature;
     1 /*  Title:      Pure/Tools/spell_checker.scala
     2     Author:     Makarius
     3 
     4 Spell checker with completion, based on JOrtho (see
     5 http://sourceforge.net/projects/jortho).
     6 */
     7 
     8 package isabelle
     9 
    10 
    11 import java.lang.Class
    12 
    13 import scala.collection.mutable
    14 import scala.annotation.tailrec
    15 import scala.collection.immutable.SortedMap
    16 
    17 
    18 object Spell_Checker
    19 {
    20   /* words within text */
    21 
    22   def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean)
    23     : List[Text.Info[String]] =
    24   {
    25     val result = new mutable.ListBuffer[Text.Info[String]]
    26     var offset = 0
    27 
    28     def apostrophe(c: Int): Boolean =
    29       c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'')
    30 
    31     @tailrec def scan(pred: Int => Boolean)
    32     {
    33       if (offset < text.length) {
    34         val c = text.codePointAt(offset)
    35         if (pred(c)) {
    36           offset += Character.charCount(c)
    37           scan(pred)
    38         }
    39       }
    40     }
    41 
    42     while (offset < text.length) {
    43       scan(c => !Character.isLetter(c))
    44       val start = offset
    45       scan(c => Character.isLetterOrDigit(c) || apostrophe(c))
    46       val stop = offset
    47       if (stop - start >= 2) {
    48         val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop))
    49         if (mark(info)) result += info
    50       }
    51     }
    52     result.toList
    53   }
    54 
    55   def current_word(rendering: Rendering, range: Text.Range): Option[Text.Info[String]] =
    56   {
    57     for {
    58       spell_range <- rendering.spell_checker_point(range)
    59       text <- rendering.model.get_text(spell_range)
    60       info <- marked_words(spell_range.start, text, info => info.range.overlaps(range)).headOption
    61     } yield info
    62   }
    63 
    64 
    65   /* dictionaries */
    66 
    67   class Dictionary private[Spell_Checker](val path: Path)
    68   {
    69     val lang = path.split_ext._1.base_name
    70     val user_path = Path.explode("$ISABELLE_HOME_USER/dictionaries") + Path.basic(lang)
    71     override def toString: String = lang
    72   }
    73 
    74   private object Decl
    75   {
    76     def apply(name: String, include: Boolean): String =
    77       if (include) name else "-" + name
    78 
    79     def unapply(decl: String): Option[(String, Boolean)] =
    80     {
    81       val decl1 = decl.trim
    82       if (decl1 == "" || decl1.startsWith("#")) None
    83       else
    84         Library.try_unprefix("-", decl1.trim) match {
    85           case None => Some((decl1, true))
    86           case Some(decl2) => Some((decl2, false))
    87         }
    88     }
    89   }
    90 
    91   def dictionaries(): List[Dictionary] =
    92     for {
    93       path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES"))
    94       if path.is_file
    95     } yield new Dictionary(path)
    96 
    97 
    98   /* create spell checker */
    99 
   100   def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary)
   101 
   102   private sealed case class Update(include: Boolean, permanent: Boolean)
   103 }
   104 
   105 
   106 class Spell_Checker private(dictionary: Spell_Checker.Dictionary)
   107 {
   108   override def toString: String = dictionary.toString
   109 
   110 
   111   /* main dictionary content */
   112 
   113   private var dict = new Object
   114   private var updates = SortedMap.empty[String, Spell_Checker.Update]
   115 
   116   private def included_iterator(): Iterator[String] =
   117     for {
   118       (word, upd) <- updates.iterator
   119       if upd.include
   120     } yield word
   121 
   122   private def excluded(word: String): Boolean =
   123     updates.get(word) match {
   124       case Some(upd) => !upd.include
   125       case None => false
   126     }
   127 
   128   private def load()
   129   {
   130     val main_dictionary = split_lines(File.read_gzip(dictionary.path))
   131 
   132     val permanent_updates =
   133       if (dictionary.user_path.is_file)
   134         for {
   135           Spell_Checker.Decl(word, include) <- split_lines(File.read(dictionary.user_path))
   136         } yield (word, Spell_Checker.Update(include, true))
   137       else Nil
   138 
   139     updates =
   140       updates -- (for ((name, upd) <- updates.iterator; if upd.permanent) yield name) ++
   141         permanent_updates
   142 
   143     val factory_class = Class.forName("com.inet.jortho.DictionaryFactory")
   144     val factory_cons = factory_class.getConstructor()
   145     factory_cons.setAccessible(true)
   146     val factory = factory_cons.newInstance()
   147 
   148     val add = Untyped.method(factory_class, "add", classOf[String])
   149 
   150     for {
   151       word <- main_dictionary.iterator ++ included_iterator()
   152       if !excluded(word)
   153     } add.invoke(factory, word)
   154 
   155     dict = Untyped.method(factory_class, "create").invoke(factory)
   156   }
   157   load()
   158 
   159   private def save()
   160   {
   161     val permanent_decls =
   162       (for {
   163         (word, upd) <- updates.iterator
   164         if upd.permanent
   165       } yield Spell_Checker.Decl(word, upd.include)).toList
   166 
   167     if (permanent_decls.nonEmpty || dictionary.user_path.is_file) {
   168       val header = """# User updates for spell-checker dictionary
   169 #
   170 #   * each line contains at most one word
   171 #   * extra blanks are ignored
   172 #   * lines starting with "#" are stripped
   173 #   * lines starting with "-" indicate excluded words
   174 #
   175 #:mode=text:encoding=UTF-8:
   176 
   177 """
   178       Isabelle_System.mkdirs(dictionary.user_path.expand.dir)
   179       File.write(dictionary.user_path, header + cat_lines(permanent_decls))
   180     }
   181   }
   182 
   183   def update(word: String, include: Boolean, permanent: Boolean)
   184   {
   185     updates += (word -> Spell_Checker.Update(include, permanent))
   186 
   187     if (include) {
   188       if (permanent) save()
   189       Untyped.method(dict.getClass, "add", classOf[String]).invoke(dict, word)
   190     }
   191     else { save(); load() }
   192   }
   193 
   194   def reset()
   195   {
   196     updates = SortedMap.empty
   197     load()
   198   }
   199 
   200   def reset_enabled(): Int =
   201     updates.valuesIterator.filter(upd => !upd.permanent).length
   202 
   203 
   204   /* check known words */
   205 
   206   def contains(word: String): Boolean =
   207     Untyped.method(dict.getClass.getSuperclass, "exist", classOf[String]).
   208       invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue
   209 
   210   def check(word: String): Boolean =
   211     word match {
   212       case Word.Case(c) if c != Word.Lowercase =>
   213         contains(word) || contains(Word.lowercase(word))
   214       case _ =>
   215         contains(word)
   216     }
   217 
   218   def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] =
   219     Spell_Checker.marked_words(base, text, info => !check(info.info))
   220 
   221 
   222   /* completion: suggestions for unknown words */
   223 
   224   private def suggestions(word: String): Option[List[String]] =
   225   {
   226     val res =
   227       Untyped.method(dict.getClass.getSuperclass, "searchSuggestions", classOf[String]).
   228         invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString)
   229     if (res.isEmpty) None else Some(res)
   230   }
   231 
   232   def complete(word: String): List[String] =
   233     if (check(word)) Nil
   234     else {
   235       val word_case = Word.Case.unapply(word)
   236       def recover_case(s: String) =
   237         word_case match {
   238           case Some(c) => Word.Case(c, s)
   239           case None => s
   240         }
   241       val result =
   242         word_case match {
   243           case Some(c) if c != Word.Lowercase =>
   244             suggestions(word) orElse suggestions(Word.lowercase(word))
   245           case _ =>
   246             suggestions(word)
   247         }
   248       result.getOrElse(Nil).map(recover_case)
   249     }
   250 
   251   def completion(rendering: Rendering, caret: Text.Offset): Option[Completion.Result] =
   252   {
   253     val caret_range = rendering.before_caret_range(caret)
   254     for {
   255       word <- Spell_Checker.current_word(rendering, caret_range)
   256       words = complete(word.info)
   257       if words.nonEmpty
   258       descr = "(from dictionary " + quote(dictionary.toString) + ")"
   259       items =
   260         words.map(w => Completion.Item(word.range, word.info, "", List(w, descr), w, 0, false))
   261     } yield Completion.Result(word.range, word.info, false, items)
   262   }
   263 }
   264 
   265 class Spell_Checker_Variable
   266 {
   267   private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None)
   268   private var current_spell_checker = no_spell_checker
   269 
   270   def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 }
   271 
   272   def update(options: Options): Unit = synchronized {
   273     if (options.bool("spell_checker")) {
   274       val lang = options.string("spell_checker_dictionary")
   275       if (current_spell_checker._1 != lang) {
   276         Spell_Checker.dictionaries.find(_.lang == lang) match {
   277           case Some(dictionary) =>
   278             val spell_checker =
   279               Exn.capture { Spell_Checker(dictionary) } match {
   280                 case Exn.Res(spell_checker) => Some(spell_checker)
   281                 case Exn.Exn(_) => None
   282               }
   283             current_spell_checker = (lang, spell_checker)
   284           case None =>
   285             current_spell_checker = no_spell_checker
   286         }
   287       }
   288     }
   289     else current_spell_checker = no_spell_checker
   290   }
   291 }