author | wenzelm |
Sun, 13 Apr 2014 16:42:44 +0200 | |
changeset 56559 | eece73c31e38 |
parent 56558 | 05c833d402bc |
child 56561 | 5b6c3d69942a |
permissions | -rw-r--r-- |
56547 | 1 |
/* Title: Tools/jEdit/src/spell_checker.scala |
2 |
Author: Makarius |
|
3 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
4 |
Spell checker based on JOrtho (see http://sourceforge.net/projects/jortho). |
56547 | 5 |
*/ |
6 |
||
7 |
package isabelle.jedit |
|
8 |
||
9 |
||
10 |
import isabelle._ |
|
11 |
||
12 |
import java.lang.Class |
|
56549 | 13 |
import java.util.Locale |
14 |
import java.text.BreakIterator |
|
15 |
||
16 |
import scala.collection.mutable |
|
56559 | 17 |
import scala.swing.ComboBox |
56547 | 18 |
|
19 |
||
20 |
object Spell_Checker |
|
21 |
{ |
|
56559 | 22 |
/* dictionary consisting of word list */ |
23 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
24 |
class Dictionary private [Spell_Checker](path: Path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
25 |
{ |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
26 |
val lang = path.split_ext._1.base.implode |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
27 |
override def toString: String = lang |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
28 |
|
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
29 |
val locale: Locale = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
30 |
space_explode('_', lang) match { |
56559 | 31 |
case l :: _ => Locale.forLanguageTag(l) |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
32 |
case Nil => Locale.ENGLISH |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
33 |
} |
56547 | 34 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
35 |
def load_words: List[String] = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
36 |
path.split_ext._2 match { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
37 |
case "gz" => split_lines(File.read_gzip(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
38 |
case "" => split_lines(File.read(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
39 |
case ext => error("Bad file extension for dictionary " + path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
40 |
} |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
41 |
} |
56547 | 42 |
|
56559 | 43 |
|
44 |
/* known dictionaries */ |
|
45 |
||
46 |
def dictionaries(): List[Dictionary] = |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
47 |
for { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
48 |
path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
49 |
if path.is_file |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
50 |
} yield new Dictionary(path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
51 |
|
56559 | 52 |
def dictionaries_selector(): Option_Component = |
53 |
{ |
|
54 |
Swing_Thread.require() |
|
55 |
||
56 |
val option_name = "spell_checker_language" |
|
57 |
val opt = PIDE.options.value.check_name(option_name) |
|
58 |
||
59 |
val entries = dictionaries() |
|
60 |
val component = new ComboBox(entries) with Option_Component { |
|
61 |
name = option_name |
|
62 |
val title = opt.title() |
|
63 |
def load: Unit = |
|
64 |
{ |
|
65 |
val lang = PIDE.options.string(option_name) |
|
66 |
entries.find(_.lang == lang) match { |
|
67 |
case Some(entry) => selection.item = entry |
|
68 |
case None => |
|
69 |
} |
|
70 |
} |
|
71 |
def save: Unit = PIDE.options.string(option_name) = selection.item.lang |
|
72 |
} |
|
73 |
||
74 |
component.load() |
|
75 |
component.tooltip = opt.print_default |
|
76 |
component |
|
77 |
} |
|
78 |
||
79 |
||
80 |
/* create spell checker */ |
|
81 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
82 |
def apply(dict: Dictionary): Spell_Checker = new Spell_Checker(dict) |
56547 | 83 |
} |
84 |
||
56559 | 85 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
86 |
class Spell_Checker private(dict: Spell_Checker.Dictionary) |
56547 | 87 |
{ |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
88 |
override def toString: String = dict.toString |
56547 | 89 |
|
90 |
private val dictionary = |
|
91 |
{ |
|
92 |
val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") |
|
93 |
val factory_cons = factory_class.getConstructor() |
|
94 |
factory_cons.setAccessible(true) |
|
95 |
val factory = factory_cons.newInstance() |
|
96 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
97 |
val add = factory_class.getDeclaredMethod("add", classOf[String]) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
98 |
add.setAccessible(true) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
99 |
dict.load_words.foreach(add.invoke(factory, _)) |
56547 | 100 |
|
101 |
val create = factory_class.getDeclaredMethod("create") |
|
102 |
create.setAccessible(true) |
|
103 |
create.invoke(factory) |
|
104 |
} |
|
105 |
||
106 |
def add(word: String) |
|
107 |
{ |
|
108 |
val m = dictionary.getClass.getDeclaredMethod("add", classOf[String]) |
|
109 |
m.setAccessible(true) |
|
110 |
m.invoke(dictionary, word) |
|
111 |
} |
|
112 |
||
56552 | 113 |
def contains(word: String): Boolean = |
56547 | 114 |
{ |
115 |
val m = dictionary.getClass.getSuperclass.getDeclaredMethod("exist", classOf[String]) |
|
116 |
m.setAccessible(true) |
|
117 |
m.invoke(dictionary, word).asInstanceOf[java.lang.Boolean].booleanValue |
|
118 |
} |
|
119 |
||
56552 | 120 |
def check(word: String): Boolean = |
121 |
contains(word) || |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
122 |
Library.is_all_caps(word) && contains(Library.lowercase(word, dict.locale)) || |
56552 | 123 |
Library.is_capitalized(word) && |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
124 |
(contains(Library.lowercase(word, dict.locale)) || |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
125 |
contains(Library.uppercase(word, dict.locale))) |
56552 | 126 |
|
56547 | 127 |
def complete(word: String): List[String] = |
128 |
{ |
|
129 |
val m = dictionary.getClass.getSuperclass. |
|
130 |
getDeclaredMethod("searchSuggestions", classOf[String]) |
|
131 |
m.setAccessible(true) |
|
132 |
m.invoke(dictionary, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) |
|
133 |
} |
|
56549 | 134 |
|
135 |
def bad_words(text: String): List[Text.Range] = |
|
136 |
{ |
|
137 |
val result = new mutable.ListBuffer[Text.Range] |
|
138 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
139 |
val it = BreakIterator.getWordInstance(dict.locale) |
56549 | 140 |
it.setText(text) |
141 |
||
142 |
var i = 0 |
|
143 |
var j = it.next |
|
144 |
while (j != BreakIterator.DONE) { |
|
145 |
val word = text.substring(i, j) |
|
146 |
if (word.length >= 2 && Character.isLetter(word(0)) && !check(word)) |
|
147 |
result += Text.Range(i, j) |
|
148 |
i = j |
|
149 |
j = it.next |
|
150 |
} |
|
151 |
result.toList |
|
152 |
} |
|
56547 | 153 |
} |
154 |
||
56558
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
155 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
156 |
class Spell_Checker_Variable |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
157 |
{ |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
158 |
private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
159 |
private var current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
160 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
161 |
def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 } |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
162 |
|
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
163 |
def update(options: Options): Unit = synchronized { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
164 |
if (options.bool("spell_checker")) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
165 |
val lang = options.string("spell_checker_language") |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
166 |
if (current_spell_checker._1 != lang) { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
167 |
Spell_Checker.dictionaries.find(_.lang == lang) match { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
168 |
case Some(dict) => |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
169 |
val spell_checker = |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
170 |
Exn.capture { Spell_Checker(dict) } match { |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
171 |
case Exn.Res(spell_checker) => Some(spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
172 |
case Exn.Exn(_) => None |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
173 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
174 |
current_spell_checker = (lang, spell_checker) |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
175 |
case None => |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
176 |
current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
177 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
178 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
179 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
180 |
else current_spell_checker = no_spell_checker |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
181 |
} |
05c833d402bc
tuned signature -- explicit Spell_Checker_Variable;
wenzelm
parents:
56557
diff
changeset
|
182 |
} |