|
1 /* Title: Tools/jEdit/src/spell_checker.scala |
|
2 Author: Makarius |
|
3 |
|
4 Spell checker with completion, based on JOrtho (see |
|
5 http://sourceforge.net/projects/jortho). |
|
6 */ |
|
7 |
|
8 package isabelle |
|
9 |
|
10 |
|
11 import java.lang.Class |
|
12 |
|
13 import scala.collection.mutable |
|
14 import scala.annotation.tailrec |
|
15 import scala.collection.immutable.SortedMap |
|
16 |
|
17 |
|
18 object Spell_Checker |
|
19 { |
|
20 /* words within text */ |
|
21 |
|
22 def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean) |
|
23 : List[Text.Info[String]] = |
|
24 { |
|
25 val result = new mutable.ListBuffer[Text.Info[String]] |
|
26 var offset = 0 |
|
27 |
|
28 def apostrophe(c: Int): Boolean = |
|
29 c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'') |
|
30 |
|
31 @tailrec def scan(pred: Int => Boolean) |
|
32 { |
|
33 if (offset < text.length) { |
|
34 val c = text.codePointAt(offset) |
|
35 if (pred(c)) { |
|
36 offset += Character.charCount(c) |
|
37 scan(pred) |
|
38 } |
|
39 } |
|
40 } |
|
41 |
|
42 while (offset < text.length) { |
|
43 scan(c => !Character.isLetter(c)) |
|
44 val start = offset |
|
45 scan(c => Character.isLetterOrDigit(c) || apostrophe(c)) |
|
46 val stop = offset |
|
47 if (stop - start >= 2) { |
|
48 val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop)) |
|
49 if (mark(info)) result += info |
|
50 } |
|
51 } |
|
52 result.toList |
|
53 } |
|
54 |
|
55 |
|
56 /* dictionaries */ |
|
57 |
|
58 class Dictionary private[Spell_Checker](val path: Path) |
|
59 { |
|
60 val lang = path.split_ext._1.base.implode |
|
61 val user_path = Path.explode("$ISABELLE_HOME_USER/dictionaries") + Path.basic(lang) |
|
62 override def toString: String = lang |
|
63 } |
|
64 |
|
65 private object Decl |
|
66 { |
|
67 def apply(name: String, include: Boolean): String = |
|
68 if (include) name else "-" + name |
|
69 |
|
70 def unapply(decl: String): Option[(String, Boolean)] = |
|
71 { |
|
72 val decl1 = decl.trim |
|
73 if (decl1 == "" || decl1.startsWith("#")) None |
|
74 else |
|
75 Library.try_unprefix("-", decl1.trim) match { |
|
76 case None => Some((decl1, true)) |
|
77 case Some(decl2) => Some((decl2, false)) |
|
78 } |
|
79 } |
|
80 } |
|
81 |
|
82 def dictionaries(): List[Dictionary] = |
|
83 for { |
|
84 path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) |
|
85 if path.is_file |
|
86 } yield new Dictionary(path) |
|
87 |
|
88 |
|
89 /* create spell checker */ |
|
90 |
|
91 def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary) |
|
92 |
|
93 private sealed case class Update(include: Boolean, permanent: Boolean) |
|
94 } |
|
95 |
|
96 |
|
97 class Spell_Checker private(dictionary: Spell_Checker.Dictionary) |
|
98 { |
|
99 override def toString: String = dictionary.toString |
|
100 |
|
101 |
|
102 /* main dictionary content */ |
|
103 |
|
104 private var dict = new Object |
|
105 private var updates = SortedMap.empty[String, Spell_Checker.Update] |
|
106 |
|
107 private def included_iterator(): Iterator[String] = |
|
108 for { |
|
109 (word, upd) <- updates.iterator |
|
110 if upd.include |
|
111 } yield word |
|
112 |
|
113 private def excluded(word: String): Boolean = |
|
114 updates.get(word) match { |
|
115 case Some(upd) => !upd.include |
|
116 case None => false |
|
117 } |
|
118 |
|
119 private def load() |
|
120 { |
|
121 val main_dictionary = split_lines(File.read_gzip(dictionary.path)) |
|
122 |
|
123 val permanent_updates = |
|
124 if (dictionary.user_path.is_file) |
|
125 for { |
|
126 Spell_Checker.Decl(word, include) <- split_lines(File.read(dictionary.user_path)) |
|
127 } yield (word, Spell_Checker.Update(include, true)) |
|
128 else Nil |
|
129 |
|
130 updates = |
|
131 updates -- (for ((name, upd) <- updates.iterator; if upd.permanent) yield name) ++ |
|
132 permanent_updates |
|
133 |
|
134 val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") |
|
135 val factory_cons = factory_class.getConstructor() |
|
136 factory_cons.setAccessible(true) |
|
137 val factory = factory_cons.newInstance() |
|
138 |
|
139 val add = Untyped.method(factory_class, "add", classOf[String]) |
|
140 |
|
141 for { |
|
142 word <- main_dictionary.iterator ++ included_iterator() |
|
143 if !excluded(word) |
|
144 } add.invoke(factory, word) |
|
145 |
|
146 dict = Untyped.method(factory_class, "create").invoke(factory) |
|
147 } |
|
148 load() |
|
149 |
|
150 private def save() |
|
151 { |
|
152 val permanent_decls = |
|
153 (for { |
|
154 (word, upd) <- updates.iterator |
|
155 if upd.permanent |
|
156 } yield Spell_Checker.Decl(word, upd.include)).toList |
|
157 |
|
158 if (permanent_decls.nonEmpty || dictionary.user_path.is_file) { |
|
159 val header = """# User updates for spell-checker dictionary |
|
160 # |
|
161 # * each line contains at most one word |
|
162 # * extra blanks are ignored |
|
163 # * lines starting with "#" are stripped |
|
164 # * lines starting with "-" indicate excluded words |
|
165 # |
|
166 #:mode=text:encoding=UTF-8: |
|
167 |
|
168 """ |
|
169 Isabelle_System.mkdirs(dictionary.user_path.expand.dir) |
|
170 File.write(dictionary.user_path, header + cat_lines(permanent_decls)) |
|
171 } |
|
172 } |
|
173 |
|
174 def update(word: String, include: Boolean, permanent: Boolean) |
|
175 { |
|
176 updates += (word -> Spell_Checker.Update(include, permanent)) |
|
177 |
|
178 if (include) { |
|
179 if (permanent) save() |
|
180 Untyped.method(dict.getClass, "add", classOf[String]).invoke(dict, word) |
|
181 } |
|
182 else { save(); load() } |
|
183 } |
|
184 |
|
185 def reset() |
|
186 { |
|
187 updates = SortedMap.empty |
|
188 load() |
|
189 } |
|
190 |
|
191 def reset_enabled(): Int = |
|
192 updates.valuesIterator.filter(upd => !upd.permanent).length |
|
193 |
|
194 |
|
195 /* check known words */ |
|
196 |
|
197 def contains(word: String): Boolean = |
|
198 Untyped.method(dict.getClass.getSuperclass, "exist", classOf[String]). |
|
199 invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue |
|
200 |
|
201 def check(word: String): Boolean = |
|
202 word match { |
|
203 case Word.Case(c) if c != Word.Lowercase => |
|
204 contains(word) || contains(Word.lowercase(word)) |
|
205 case _ => |
|
206 contains(word) |
|
207 } |
|
208 |
|
209 def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] = |
|
210 Spell_Checker.marked_words(base, text, info => !check(info.info)) |
|
211 |
|
212 |
|
213 /* suggestions for unknown words */ |
|
214 |
|
215 private def suggestions(word: String): Option[List[String]] = |
|
216 { |
|
217 val res = |
|
218 Untyped.method(dict.getClass.getSuperclass, "searchSuggestions", classOf[String]). |
|
219 invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) |
|
220 if (res.isEmpty) None else Some(res) |
|
221 } |
|
222 |
|
223 def complete(word: String): List[String] = |
|
224 if (check(word)) Nil |
|
225 else { |
|
226 val word_case = Word.Case.unapply(word) |
|
227 def recover_case(s: String) = |
|
228 word_case match { |
|
229 case Some(c) => Word.Case(c, s) |
|
230 case None => s |
|
231 } |
|
232 val result = |
|
233 word_case match { |
|
234 case Some(c) if c != Word.Lowercase => |
|
235 suggestions(word) orElse suggestions(Word.lowercase(word)) |
|
236 case _ => |
|
237 suggestions(word) |
|
238 } |
|
239 result.getOrElse(Nil).map(recover_case) |
|
240 } |
|
241 |
|
242 def complete_enabled(word: String): Boolean = complete(word).nonEmpty |
|
243 } |
|
244 |
|
245 |
|
246 class Spell_Checker_Variable |
|
247 { |
|
248 private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None) |
|
249 private var current_spell_checker = no_spell_checker |
|
250 |
|
251 def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 } |
|
252 |
|
253 def update(options: Options): Unit = synchronized { |
|
254 if (options.bool("spell_checker")) { |
|
255 val lang = options.string("spell_checker_dictionary") |
|
256 if (current_spell_checker._1 != lang) { |
|
257 Spell_Checker.dictionaries.find(_.lang == lang) match { |
|
258 case Some(dictionary) => |
|
259 val spell_checker = |
|
260 Exn.capture { Spell_Checker(dictionary) } match { |
|
261 case Exn.Res(spell_checker) => Some(spell_checker) |
|
262 case Exn.Exn(_) => None |
|
263 } |
|
264 current_spell_checker = (lang, spell_checker) |
|
265 case None => |
|
266 current_spell_checker = no_spell_checker |
|
267 } |
|
268 } |
|
269 } |
|
270 else current_spell_checker = no_spell_checker |
|
271 } |
|
272 } |