author | wenzelm |
Sun, 13 Apr 2014 15:34:54 +0200 | |
changeset 56557 | 18d921496aa5 |
parent 56552 | 76cf86240cb7 |
child 56558 | 05c833d402bc |
permissions | -rw-r--r-- |
56547 | 1 |
/* Title: Tools/jEdit/src/spell_checker.scala |
2 |
Author: Makarius |
|
3 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
4 |
Spell checker based on JOrtho (see http://sourceforge.net/projects/jortho). |
56547 | 5 |
*/ |
6 |
||
7 |
package isabelle.jedit |
|
8 |
||
9 |
||
10 |
import isabelle._ |
|
11 |
||
12 |
import java.lang.Class |
|
56549 | 13 |
import java.util.Locale |
14 |
import java.text.BreakIterator |
|
15 |
||
16 |
import scala.collection.mutable |
|
56547 | 17 |
|
18 |
||
19 |
object Spell_Checker |
|
20 |
{ |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
21 |
class Dictionary private [Spell_Checker](path: Path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
22 |
{ |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
23 |
val lang = path.split_ext._1.base.implode |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
24 |
override def toString: String = lang |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
25 |
|
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
26 |
val locale: Locale = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
27 |
space_explode('_', lang) match { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
28 |
case a :: _ => Locale.forLanguageTag(a) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
29 |
case Nil => Locale.ENGLISH |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
30 |
} |
56547 | 31 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
32 |
def load_words: List[String] = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
33 |
path.split_ext._2 match { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
34 |
case "gz" => split_lines(File.read_gzip(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
35 |
case "" => split_lines(File.read(path)) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
36 |
case ext => error("Bad file extension for dictionary " + path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
37 |
} |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
38 |
} |
56547 | 39 |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
40 |
def dictionaries: List[Dictionary] = |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
41 |
for { |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
42 |
path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
43 |
if path.is_file |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
44 |
} yield new Dictionary(path) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
45 |
|
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
46 |
def apply(dict: Dictionary): Spell_Checker = new Spell_Checker(dict) |
56547 | 47 |
} |
48 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
49 |
class Spell_Checker private(dict: Spell_Checker.Dictionary) |
56547 | 50 |
{ |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
51 |
override def toString: String = dict.toString |
56547 | 52 |
|
53 |
private val dictionary = |
|
54 |
{ |
|
55 |
val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") |
|
56 |
val factory_cons = factory_class.getConstructor() |
|
57 |
factory_cons.setAccessible(true) |
|
58 |
val factory = factory_cons.newInstance() |
|
59 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
60 |
val add = factory_class.getDeclaredMethod("add", classOf[String]) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
61 |
add.setAccessible(true) |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
62 |
dict.load_words.foreach(add.invoke(factory, _)) |
56547 | 63 |
|
64 |
val create = factory_class.getDeclaredMethod("create") |
|
65 |
create.setAccessible(true) |
|
66 |
create.invoke(factory) |
|
67 |
} |
|
68 |
||
69 |
def add(word: String) |
|
70 |
{ |
|
71 |
val m = dictionary.getClass.getDeclaredMethod("add", classOf[String]) |
|
72 |
m.setAccessible(true) |
|
73 |
m.invoke(dictionary, word) |
|
74 |
} |
|
75 |
||
56552 | 76 |
def contains(word: String): Boolean = |
56547 | 77 |
{ |
78 |
val m = dictionary.getClass.getSuperclass.getDeclaredMethod("exist", classOf[String]) |
|
79 |
m.setAccessible(true) |
|
80 |
m.invoke(dictionary, word).asInstanceOf[java.lang.Boolean].booleanValue |
|
81 |
} |
|
82 |
||
56552 | 83 |
def check(word: String): Boolean = |
84 |
contains(word) || |
|
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
85 |
Library.is_all_caps(word) && contains(Library.lowercase(word, dict.locale)) || |
56552 | 86 |
Library.is_capitalized(word) && |
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
87 |
(contains(Library.lowercase(word, dict.locale)) || |
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
88 |
contains(Library.uppercase(word, dict.locale))) |
56552 | 89 |
|
56547 | 90 |
def complete(word: String): List[String] = |
91 |
{ |
|
92 |
val m = dictionary.getClass.getSuperclass. |
|
93 |
getDeclaredMethod("searchSuggestions", classOf[String]) |
|
94 |
m.setAccessible(true) |
|
95 |
m.invoke(dictionary, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) |
|
96 |
} |
|
56549 | 97 |
|
98 |
def bad_words(text: String): List[Text.Range] = |
|
99 |
{ |
|
100 |
val result = new mutable.ListBuffer[Text.Range] |
|
101 |
||
56557
18d921496aa5
updated to jortho-1.0-1: dictionaries from SCOWL 7.1, with parameters like aspell;
wenzelm
parents:
56552
diff
changeset
|
102 |
val it = BreakIterator.getWordInstance(dict.locale) |
56549 | 103 |
it.setText(text) |
104 |
||
105 |
var i = 0 |
|
106 |
var j = it.next |
|
107 |
while (j != BreakIterator.DONE) { |
|
108 |
val word = text.substring(i, j) |
|
109 |
if (word.length >= 2 && Character.isLetter(word(0)) && !check(word)) |
|
110 |
result += Text.Range(i, j) |
|
111 |
i = j |
|
112 |
j = it.next |
|
113 |
} |
|
114 |
result.toList |
|
115 |
} |
|
56547 | 116 |
} |
117 |