src/Pure/Tools/spell_checker.scala
changeset 65139 0a2c0712e432
child 65367 83c30e290702
equal deleted inserted replaced
65138:64dfee6bd243 65139:0a2c0712e432
       
     1 /*  Title:      Tools/jEdit/src/spell_checker.scala
       
     2     Author:     Makarius
       
     3 
       
     4 Spell checker with completion, based on JOrtho (see
       
     5 http://sourceforge.net/projects/jortho).
       
     6 */
       
     7 
       
     8 package isabelle
       
     9 
       
    10 
       
    11 import java.lang.Class
       
    12 
       
    13 import scala.collection.mutable
       
    14 import scala.annotation.tailrec
       
    15 import scala.collection.immutable.SortedMap
       
    16 
       
    17 
       
    18 object Spell_Checker
       
    19 {
       
    20   /* words within text */
       
    21 
       
    22   def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean)
       
    23     : List[Text.Info[String]] =
       
    24   {
       
    25     val result = new mutable.ListBuffer[Text.Info[String]]
       
    26     var offset = 0
       
    27 
       
    28     def apostrophe(c: Int): Boolean =
       
    29       c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'')
       
    30 
       
    31     @tailrec def scan(pred: Int => Boolean)
       
    32     {
       
    33       if (offset < text.length) {
       
    34         val c = text.codePointAt(offset)
       
    35         if (pred(c)) {
       
    36           offset += Character.charCount(c)
       
    37           scan(pred)
       
    38         }
       
    39       }
       
    40     }
       
    41 
       
    42     while (offset < text.length) {
       
    43       scan(c => !Character.isLetter(c))
       
    44       val start = offset
       
    45       scan(c => Character.isLetterOrDigit(c) || apostrophe(c))
       
    46       val stop = offset
       
    47       if (stop - start >= 2) {
       
    48         val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop))
       
    49         if (mark(info)) result += info
       
    50       }
       
    51     }
       
    52     result.toList
       
    53   }
       
    54 
       
    55 
       
    56   /* dictionaries */
       
    57 
       
    58   class Dictionary private[Spell_Checker](val path: Path)
       
    59   {
       
    60     val lang = path.split_ext._1.base.implode
       
    61     val user_path = Path.explode("$ISABELLE_HOME_USER/dictionaries") + Path.basic(lang)
       
    62     override def toString: String = lang
       
    63   }
       
    64 
       
    65   private object Decl
       
    66   {
       
    67     def apply(name: String, include: Boolean): String =
       
    68       if (include) name else "-" + name
       
    69 
       
    70     def unapply(decl: String): Option[(String, Boolean)] =
       
    71     {
       
    72       val decl1 = decl.trim
       
    73       if (decl1 == "" || decl1.startsWith("#")) None
       
    74       else
       
    75         Library.try_unprefix("-", decl1.trim) match {
       
    76           case None => Some((decl1, true))
       
    77           case Some(decl2) => Some((decl2, false))
       
    78         }
       
    79     }
       
    80   }
       
    81 
       
    82   def dictionaries(): List[Dictionary] =
       
    83     for {
       
    84       path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES"))
       
    85       if path.is_file
       
    86     } yield new Dictionary(path)
       
    87 
       
    88 
       
    89   /* create spell checker */
       
    90 
       
    91   def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary)
       
    92 
       
    93   private sealed case class Update(include: Boolean, permanent: Boolean)
       
    94 }
       
    95 
       
    96 
       
    97 class Spell_Checker private(dictionary: Spell_Checker.Dictionary)
       
    98 {
       
    99   override def toString: String = dictionary.toString
       
   100 
       
   101 
       
   102   /* main dictionary content */
       
   103 
       
   104   private var dict = new Object
       
   105   private var updates = SortedMap.empty[String, Spell_Checker.Update]
       
   106 
       
   107   private def included_iterator(): Iterator[String] =
       
   108     for {
       
   109       (word, upd) <- updates.iterator
       
   110       if upd.include
       
   111     } yield word
       
   112 
       
   113   private def excluded(word: String): Boolean =
       
   114     updates.get(word) match {
       
   115       case Some(upd) => !upd.include
       
   116       case None => false
       
   117     }
       
   118 
       
   119   private def load()
       
   120   {
       
   121     val main_dictionary = split_lines(File.read_gzip(dictionary.path))
       
   122 
       
   123     val permanent_updates =
       
   124       if (dictionary.user_path.is_file)
       
   125         for {
       
   126           Spell_Checker.Decl(word, include) <- split_lines(File.read(dictionary.user_path))
       
   127         } yield (word, Spell_Checker.Update(include, true))
       
   128       else Nil
       
   129 
       
   130     updates =
       
   131       updates -- (for ((name, upd) <- updates.iterator; if upd.permanent) yield name) ++
       
   132         permanent_updates
       
   133 
       
   134     val factory_class = Class.forName("com.inet.jortho.DictionaryFactory")
       
   135     val factory_cons = factory_class.getConstructor()
       
   136     factory_cons.setAccessible(true)
       
   137     val factory = factory_cons.newInstance()
       
   138 
       
   139     val add = Untyped.method(factory_class, "add", classOf[String])
       
   140 
       
   141     for {
       
   142       word <- main_dictionary.iterator ++ included_iterator()
       
   143       if !excluded(word)
       
   144     } add.invoke(factory, word)
       
   145 
       
   146     dict = Untyped.method(factory_class, "create").invoke(factory)
       
   147   }
       
   148   load()
       
   149 
       
   150   private def save()
       
   151   {
       
   152     val permanent_decls =
       
   153       (for {
       
   154         (word, upd) <- updates.iterator
       
   155         if upd.permanent
       
   156       } yield Spell_Checker.Decl(word, upd.include)).toList
       
   157 
       
   158     if (permanent_decls.nonEmpty || dictionary.user_path.is_file) {
       
   159       val header = """# User updates for spell-checker dictionary
       
   160 #
       
   161 #   * each line contains at most one word
       
   162 #   * extra blanks are ignored
       
   163 #   * lines starting with "#" are stripped
       
   164 #   * lines starting with "-" indicate excluded words
       
   165 #
       
   166 #:mode=text:encoding=UTF-8:
       
   167 
       
   168 """
       
   169       Isabelle_System.mkdirs(dictionary.user_path.expand.dir)
       
   170       File.write(dictionary.user_path, header + cat_lines(permanent_decls))
       
   171     }
       
   172   }
       
   173 
       
   174   def update(word: String, include: Boolean, permanent: Boolean)
       
   175   {
       
   176     updates += (word -> Spell_Checker.Update(include, permanent))
       
   177 
       
   178     if (include) {
       
   179       if (permanent) save()
       
   180       Untyped.method(dict.getClass, "add", classOf[String]).invoke(dict, word)
       
   181     }
       
   182     else { save(); load() }
       
   183   }
       
   184 
       
   185   def reset()
       
   186   {
       
   187     updates = SortedMap.empty
       
   188     load()
       
   189   }
       
   190 
       
   191   def reset_enabled(): Int =
       
   192     updates.valuesIterator.filter(upd => !upd.permanent).length
       
   193 
       
   194 
       
   195   /* check known words */
       
   196 
       
   197   def contains(word: String): Boolean =
       
   198     Untyped.method(dict.getClass.getSuperclass, "exist", classOf[String]).
       
   199       invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue
       
   200 
       
   201   def check(word: String): Boolean =
       
   202     word match {
       
   203       case Word.Case(c) if c != Word.Lowercase =>
       
   204         contains(word) || contains(Word.lowercase(word))
       
   205       case _ =>
       
   206         contains(word)
       
   207     }
       
   208 
       
   209   def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] =
       
   210     Spell_Checker.marked_words(base, text, info => !check(info.info))
       
   211 
       
   212 
       
   213   /* suggestions for unknown words */
       
   214 
       
   215   private def suggestions(word: String): Option[List[String]] =
       
   216   {
       
   217     val res =
       
   218       Untyped.method(dict.getClass.getSuperclass, "searchSuggestions", classOf[String]).
       
   219         invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString)
       
   220     if (res.isEmpty) None else Some(res)
       
   221   }
       
   222 
       
   223   def complete(word: String): List[String] =
       
   224     if (check(word)) Nil
       
   225     else {
       
   226       val word_case = Word.Case.unapply(word)
       
   227       def recover_case(s: String) =
       
   228         word_case match {
       
   229           case Some(c) => Word.Case(c, s)
       
   230           case None => s
       
   231         }
       
   232       val result =
       
   233         word_case match {
       
   234           case Some(c) if c != Word.Lowercase =>
       
   235             suggestions(word) orElse suggestions(Word.lowercase(word))
       
   236           case _ =>
       
   237             suggestions(word)
       
   238         }
       
   239       result.getOrElse(Nil).map(recover_case)
       
   240     }
       
   241 
       
   242   def complete_enabled(word: String): Boolean = complete(word).nonEmpty
       
   243 }
       
   244 
       
   245 
       
   246 class Spell_Checker_Variable
       
   247 {
       
   248   private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None)
       
   249   private var current_spell_checker = no_spell_checker
       
   250 
       
   251   def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 }
       
   252 
       
   253   def update(options: Options): Unit = synchronized {
       
   254     if (options.bool("spell_checker")) {
       
   255       val lang = options.string("spell_checker_dictionary")
       
   256       if (current_spell_checker._1 != lang) {
       
   257         Spell_Checker.dictionaries.find(_.lang == lang) match {
       
   258           case Some(dictionary) =>
       
   259             val spell_checker =
       
   260               Exn.capture { Spell_Checker(dictionary) } match {
       
   261                 case Exn.Res(spell_checker) => Some(spell_checker)
       
   262                 case Exn.Exn(_) => None
       
   263               }
       
   264             current_spell_checker = (lang, spell_checker)
       
   265           case None =>
       
   266             current_spell_checker = no_spell_checker
       
   267         }
       
   268       }
       
   269     }
       
   270     else current_spell_checker = no_spell_checker
       
   271   }
       
   272 }