more elementary notion of "word" (similar to VoxSpell) -- treat hyphen as separator;
authorwenzelm
Sun, 13 Apr 2014 19:20:03 +0200
changeset 56561 5b6c3d69942a
parent 56560 ac916ea744e4
child 56562 7f6f5665a96e
more elementary notion of "word" (similar to VoxSpell) -- treat hyphen as separator;
src/Tools/jEdit/src/spell_checker.scala
--- a/src/Tools/jEdit/src/spell_checker.scala	Sun Apr 13 19:18:30 2014 +0200
+++ b/src/Tools/jEdit/src/spell_checker.scala	Sun Apr 13 19:20:03 2014 +0200
@@ -15,6 +15,7 @@
 
 import scala.collection.mutable
 import scala.swing.ComboBox
+import scala.annotation.tailrec
 
 
 object Spell_Checker
@@ -135,18 +136,26 @@
   def bad_words(text: String): List[Text.Range] =
   {
     val result = new mutable.ListBuffer[Text.Range]
-
-    val it = BreakIterator.getWordInstance(dict.locale)
-    it.setText(text)
+    var offset = 0
 
-    var i = 0
-    var j = it.next
-    while (j != BreakIterator.DONE) {
-      val word = text.substring(i, j)
-      if (word.length >= 2 && Character.isLetter(word(0)) && !check(word))
-        result += Text.Range(i, j)
-      i = j
-      j = it.next
+    @tailrec def scan(pred: Int => Boolean)
+    {
+      if (offset < text.length) {
+        val c = text.codePointAt(offset)
+        if (pred(c)) {
+          offset += Character.charCount(c)
+          scan(pred)
+        }
+      }
+    }
+
+    while (offset < text.length) {
+      scan(c => !Character.isLetter(c))
+      val start = offset
+      scan(c => Character.isLetterOrDigit(c) || c == '\'')
+      val stop = offset
+      if (stop - start >= 2 && !check(text.substring(start, stop)))
+        result += Text.Range(start, stop)
     }
     result.toList
   }