author | wenzelm |
Sun, 13 Apr 2014 21:43:25 +0200 | |
changeset 56564 | 94c55cc73747 |
parent 56563 | 9ac666f343d4 |
child 56565 | 927dff80d0df |
permissions | -rw-r--r-- |
56547 | 1 |
/* Title: Tools/jEdit/src/spell_checker.scala |
2 |
Author: Makarius |
|
3 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
4 |
Spell checker based on JOrtho (see http://sourceforge.net/projects/jortho). |
56547 | 5 |
*/ |
6 |
||
7 |
package isabelle.jedit |
|
8 |
||
9 |
||
10 |
import isabelle._ |
|
11 |
||
12 |
import java.lang.Class |
|
56549 | 13 |
import java.util.Locale |
14 |
import java.text.BreakIterator |
|
15 |
||
16 |
import scala.collection.mutable |
|
56559 | 17 |
import scala.swing.ComboBox |
56561
5b6c3d69942a
more elementary notion of "word" (similar to VoxSpell) -- treat hyphen as separator;
wenzelm
parents:
56559
diff
changeset
|
18 |
import scala.annotation.tailrec |
56547 | 19 |
|
20 |
||
21 |
object Spell_Checker |
|
22 |
{ |
|
56563 | 23 |
/* marked words within text */ |
24 |
||
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
25 |
def marked_words(text: String, mark: Text.Info[String] => Boolean): List[Text.Info[String]] = |
56563 | 26 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
27 |
val result = new mutable.ListBuffer[Text.Info[String]] |
56563 | 28 |
var offset = 0 |
29 |
||
30 |
def apostrophe(c: Int): Boolean = |
|
31 |
c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'') |
|
32 |
||
33 |
@tailrec def scan(pred: Int => Boolean) |
|
34 |
{ |
|
35 |
if (offset < text.length) { |
|
36 |
val c = text.codePointAt(offset) |
|
37 |
if (pred(c)) { |
|
38 |
offset += Character.charCount(c) |
|
39 |
scan(pred) |
|
40 |
} |
|
41 |
} |
|
42 |
} |
|
43 |
||
44 |
while (offset < text.length) { |
|
45 |
scan(c => !Character.isLetter(c)) |
|
46 |
val start = offset |
|
47 |
scan(c => Character.isLetterOrDigit(c) || apostrophe(c)) |
|
48 |
val stop = offset |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
49 |
if (stop - start >= 2) { |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
50 |
val info = Text.Info(Text.Range(start, stop), text.substring(start, stop)) |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
51 |
if (mark(info)) result += info |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
52 |
} |
56563 | 53 |
} |
54 |
result.toList |
|
55 |
} |
|
56 |
||
57 |
||
56559 | 58 |
/* dictionary consisting of word list */ |
59 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
60 |
class Dictionary private [Spell_Checker](path: Path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
61 |
{ |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
62 |
val lang = path.split_ext._1.base.implode |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
63 |
override def toString: String = lang |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
64 |
|
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
65 |
val locale: Locale = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
66 |
space_explode('_', lang) match { |
56559 | 67 |
case l :: _ => Locale.forLanguageTag(l) |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
68 |
case Nil => Locale.ENGLISH |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
69 |
} |
56547 | 70 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
71 |
def load_words: List[String] = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
72 |
path.split_ext._2 match { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
73 |
case "gz" => split_lines(File.read_gzip(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
74 |
case "" => split_lines(File.read(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
75 |
case ext => error("Bad file extension for dictionary " + path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
76 |
} |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
77 |
} |
56547 | 78 |
|
56559 | 79 |
|
80 |
/* known dictionaries */ |
|
81 |
||
82 |
def dictionaries(): List[Dictionary] = |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
83 |
for { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
84 |
path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
85 |
if path.is_file |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
86 |
} yield new Dictionary(path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
87 |
|
56559 | 88 |
def dictionaries_selector(): Option_Component = |
89 |
{ |
|
90 |
Swing_Thread.require() |
|
91 |
||
92 |
val option_name = "spell_checker_language" |
|
93 |
val opt = PIDE.options.value.check_name(option_name) |
|
94 |
||
95 |
val entries = dictionaries() |
|
96 |
val component = new ComboBox(entries) with Option_Component { |
|
97 |
name = option_name |
|
98 |
val title = opt.title() |
|
99 |
def load: Unit = |
|
100 |
{ |
|
101 |
val lang = PIDE.options.string(option_name) |
|
102 |
entries.find(_.lang == lang) match { |
|
103 |
case Some(entry) => selection.item = entry |
|
104 |
case None => |
|
105 |
} |
|
106 |
} |
|
107 |
def save: Unit = PIDE.options.string(option_name) = selection.item.lang |
|
108 |
} |
|
109 |
||
110 |
component.load() |
|
111 |
component.tooltip = opt.print_default |
|
112 |
component |
|
113 |
} |
|
114 |
||
115 |
||
116 |
/* create spell checker */ |
|
117 |
||
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
118 |
def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary) |
56547 | 119 |
} |
120 |
||
56559 | 121 |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
122 |
class Spell_Checker private(dictionary: Spell_Checker.Dictionary) |
56547 | 123 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
124 |
override def toString: String = dictionary.toString |
56547 | 125 |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
126 |
private val dict = |
56547 | 127 |
{ |
128 |
val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") |
|
129 |
val factory_cons = factory_class.getConstructor() |
|
130 |
factory_cons.setAccessible(true) |
|
131 |
val factory = factory_cons.newInstance() |
|
132 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
133 |
val add = factory_class.getDeclaredMethod("add", classOf[String]) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
134 |
add.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
135 |
dictionary.load_words.foreach(add.invoke(factory, _)) |
56547 | 136 |
|
137 |
val create = factory_class.getDeclaredMethod("create") |
|
138 |
create.setAccessible(true) |
|
139 |
create.invoke(factory) |
|
140 |
} |
|
141 |
||
142 |
def add(word: String) |
|
143 |
{ |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
144 |
val m = dict.getClass.getDeclaredMethod("add", classOf[String]) |
56547 | 145 |
m.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
146 |
m.invoke(dict, word) |
56547 | 147 |
} |
148 |
||
56552 | 149 |
def contains(word: String): Boolean = |
56547 | 150 |
{ |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
151 |
val m = dict.getClass.getSuperclass.getDeclaredMethod("exist", classOf[String]) |
56547 | 152 |
m.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
153 |
m.invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue |
56547 | 154 |
} |
155 |
||
56552 | 156 |
def check(word: String): Boolean = |
157 |
contains(word) || |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
158 |
Library.is_all_caps(word) && contains(Library.lowercase(word, dictionary.locale)) || |
56552 | 159 |
Library.is_capitalized(word) && |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
160 |
(contains(Library.lowercase(word, dictionary.locale)) || |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
161 |
contains(Library.uppercase(word, dictionary.locale))) |
56552 | 162 |
|
56547 | 163 |
def complete(word: String): List[String] = |
164 |
{ |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
165 |
val m = dict.getClass.getSuperclass. getDeclaredMethod("searchSuggestions", classOf[String]) |
56547 | 166 |
m.setAccessible(true) |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
167 |
m.invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) |
56547 | 168 |
} |
56549 | 169 |
|
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
170 |
def marked_words(text: String): List[Text.Info[String]] = |
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
171 |
Spell_Checker.marked_words(text, info => !check(info.info)) |
56547 | 172 |
} |
173 |
||
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
174 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
175 |
class Spell_Checker_Variable |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
176 |
{ |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
177 |
private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
178 |
private var current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
179 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
180 |
def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 } |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
181 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
182 |
def update(options: Options): Unit = synchronized { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
183 |
if (options.bool("spell_checker")) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
184 |
val lang = options.string("spell_checker_language") |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
185 |
if (current_spell_checker._1 != lang) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
186 |
Spell_Checker.dictionaries.find(_.lang == lang) match { |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
187 |
case Some(dictionary) => |
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
188 |
val spell_checker = |
56564
94c55cc73747
added spell-checker completion dialog, without counting frequency of items due to empty name;
wenzelm
parents:
56563
diff
changeset
|
189 |
Exn.capture { Spell_Checker(dictionary) } match { |
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
190 |
case Exn.Res(spell_checker) => Some(spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
191 |
case Exn.Exn(_) => None |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
192 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
193 |
current_spell_checker = (lang, spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
194 |
case None => |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
195 |
current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
196 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
197 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
198 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
199 |
else current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
200 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
201 |
} |