author | wenzelm |
Sun, 13 Apr 2014 21:59:37 +0200 | |
changeset 56566 | 46a4c6b688c9 |
parent 56565 | 927dff80d0df |
child 56567 | 7adad03f2cef |
permissions | -rw-r--r-- |
56547 | 1 |
/* Title: Tools/jEdit/src/spell_checker.scala |
2 |
Author: Makarius |
|
3 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
4 |
Spell checker based on JOrtho (see http://sourceforge.net/projects/jortho). |
56547 | 5 |
*/ |
6 |
||
7 |
package isabelle.jedit |
|
8 |
||
9 |
||
10 |
import isabelle._ |
|
11 |
||
12 |
import java.lang.Class |
|
56549 | 13 |
import java.util.Locale |
14 |
import java.text.BreakIterator |
|
15 |
||
16 |
import scala.collection.mutable |
|
56559 | 17 |
import scala.swing.ComboBox |
56561
5b6c3d69942a
more elementary notion of "word" (similar to VoxSpell) -- treat hyphen as separator;
wenzelm
parents:
56559
diff
changeset
|
18 |
import scala.annotation.tailrec |
56547 | 19 |
|
20 |
||
21 |
object Spell_Checker |
|
22 |
{ |
|
56563 | 23 |
/* marked words within text */ |
24 |
||
56565 | 25 |
def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean) |
26 |
: List[Text.Info[String]] = |
|
56563 | 27 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
28 |
val result = new mutable.ListBuffer[Text.Info[String]] |
56563 | 29 |
var offset = 0 |
30 |
||
31 |
def apostrophe(c: Int): Boolean = |
|
32 |
c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'') |
|
33 |
||
34 |
@tailrec def scan(pred: Int => Boolean) |
|
35 |
{ |
|
36 |
if (offset < text.length) { |
|
37 |
val c = text.codePointAt(offset) |
|
38 |
if (pred(c)) { |
|
39 |
offset += Character.charCount(c) |
|
40 |
scan(pred) |
|
41 |
} |
|
42 |
} |
|
43 |
} |
|
44 |
||
45 |
while (offset < text.length) { |
|
46 |
scan(c => !Character.isLetter(c)) |
|
47 |
val start = offset |
|
48 |
scan(c => Character.isLetterOrDigit(c) || apostrophe(c)) |
|
49 |
val stop = offset |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
50 |
if (stop - start >= 2) { |
56565 | 51 |
val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop)) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
52 |
if (mark(info)) result += info |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
53 |
} |
56563 | 54 |
} |
55 |
result.toList |
|
56 |
} |
|
57 |
||
58 |
||
56559 | 59 |
/* dictionary consisting of word list */ |
60 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
61 |
class Dictionary private [Spell_Checker](path: Path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
62 |
{ |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
63 |
val lang = path.split_ext._1.base.implode |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
64 |
override def toString: String = lang |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
65 |
|
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
66 |
val locale: Locale = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
67 |
space_explode('_', lang) match { |
56559 | 68 |
case l :: _ => Locale.forLanguageTag(l) |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
69 |
case Nil => Locale.ENGLISH |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
70 |
} |
56547 | 71 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
72 |
def load_words: List[String] = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
73 |
path.split_ext._2 match { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
74 |
case "gz" => split_lines(File.read_gzip(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
75 |
case "" => split_lines(File.read(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
76 |
case ext => error("Bad file extension for dictionary " + path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
77 |
} |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
78 |
} |
56547 | 79 |
|
56559 | 80 |
|
81 |
/* known dictionaries */ |
|
82 |
||
83 |
def dictionaries(): List[Dictionary] = |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
84 |
for { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
85 |
path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
86 |
if path.is_file |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
87 |
} yield new Dictionary(path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
88 |
|
56559 | 89 |
def dictionaries_selector(): Option_Component = |
90 |
{ |
|
91 |
Swing_Thread.require() |
|
92 |
||
93 |
val option_name = "spell_checker_language" |
|
94 |
val opt = PIDE.options.value.check_name(option_name) |
|
95 |
||
96 |
val entries = dictionaries() |
|
97 |
val component = new ComboBox(entries) with Option_Component { |
|
98 |
name = option_name |
|
99 |
val title = opt.title() |
|
100 |
def load: Unit = |
|
101 |
{ |
|
102 |
val lang = PIDE.options.string(option_name) |
|
103 |
entries.find(_.lang == lang) match { |
|
104 |
case Some(entry) => selection.item = entry |
|
105 |
case None => |
|
106 |
} |
|
107 |
} |
|
108 |
def save: Unit = PIDE.options.string(option_name) = selection.item.lang |
|
109 |
} |
|
110 |
||
111 |
component.load() |
|
112 |
component.tooltip = opt.print_default |
|
113 |
component |
|
114 |
} |
|
115 |
||
116 |
||
117 |
/* create spell checker */ |
|
118 |
||
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
119 |
def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary) |
56547 | 120 |
} |
121 |
||
56559 | 122 |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
123 |
class Spell_Checker private(dictionary: Spell_Checker.Dictionary) |
56547 | 124 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
125 |
override def toString: String = dictionary.toString |
56547 | 126 |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
127 |
private val dict = |
56547 | 128 |
{ |
129 |
val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") |
|
130 |
val factory_cons = factory_class.getConstructor() |
|
131 |
factory_cons.setAccessible(true) |
|
132 |
val factory = factory_cons.newInstance() |
|
133 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
134 |
val add = factory_class.getDeclaredMethod("add", classOf[String]) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
135 |
add.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
136 |
dictionary.load_words.foreach(add.invoke(factory, _)) |
56547 | 137 |
|
138 |
val create = factory_class.getDeclaredMethod("create") |
|
139 |
create.setAccessible(true) |
|
140 |
create.invoke(factory) |
|
141 |
} |
|
142 |
||
143 |
def add(word: String) |
|
144 |
{ |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
145 |
val m = dict.getClass.getDeclaredMethod("add", classOf[String]) |
56547 | 146 |
m.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
147 |
m.invoke(dict, word) |
56547 | 148 |
} |
149 |
||
56552 | 150 |
def contains(word: String): Boolean = |
56547 | 151 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
152 |
val m = dict.getClass.getSuperclass.getDeclaredMethod("exist", classOf[String]) |
56547 | 153 |
m.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
154 |
m.invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue |
56547 | 155 |
} |
156 |
||
56552 | 157 |
def check(word: String): Boolean = |
158 |
contains(word) || |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
159 |
Library.is_all_caps(word) && contains(Library.lowercase(word, dictionary.locale)) || |
56552 | 160 |
Library.is_capitalized(word) && |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
161 |
(contains(Library.lowercase(word, dictionary.locale)) || |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
162 |
contains(Library.uppercase(word, dictionary.locale))) |
56552 | 163 |
|
56547 | 164 |
def complete(word: String): List[String] = |
56566
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
165 |
if (check(word)) Nil |
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
166 |
else { |
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
167 |
val m = dict.getClass.getSuperclass. getDeclaredMethod("searchSuggestions", classOf[String]) |
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
168 |
m.setAccessible(true) |
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
169 |
m.invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) |
46a4c6b688c9
clarified complete: participate in case-mangling of check;
wenzelm
parents:
56565
diff
changeset
|
170 |
} |
56549 | 171 |
|
56565 | 172 |
def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] = |
173 |
Spell_Checker.marked_words(base, text, info => !check(info.info)) |
|
56547 | 174 |
} |
175 |
||
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
176 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
177 |
class Spell_Checker_Variable |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
178 |
{ |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
179 |
private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
180 |
private var current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
181 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
182 |
def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 } |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
183 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
184 |
def update(options: Options): Unit = synchronized { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
185 |
if (options.bool("spell_checker")) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
186 |
val lang = options.string("spell_checker_language") |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
187 |
if (current_spell_checker._1 != lang) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
188 |
Spell_Checker.dictionaries.find(_.lang == lang) match { |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
189 |
case Some(dictionary) => |
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
190 |
val spell_checker = |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
191 |
Exn.capture { Spell_Checker(dictionary) } match { |
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
192 |
case Exn.Res(spell_checker) => Some(spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
193 |
case Exn.Exn(_) => None |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
194 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
195 |
current_spell_checker = (lang, spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
196 |
case None => |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
197 |
current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
198 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
199 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
200 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
201 |
else current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
202 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
203 |
} |