--- a/src/Pure/General/scan.scala Tue Jun 16 15:25:32 2009 +0200
+++ b/src/Pure/General/scan.scala Tue Jun 16 18:47:45 2009 +0200
@@ -1,7 +1,7 @@
/* Title: Pure/General/scan.scala
Author: Makarius
-Efficient scanning of literals.
+Efficient scanning of keywords.
*/
package isabelle
@@ -12,71 +12,113 @@
object Scan
{
- /* class Lexicon -- position tree */
-
- case class Lexicon(val tab: Map[Char, (Boolean, Lexicon)])
+ /** Lexicon -- position tree **/
- val empty_lexicon = Lexicon(Map())
-
- def is_literal(lx: Lexicon, str: CharSequence): Boolean =
+ object Lexicon
{
- val len = str.length
- def is_lit(lexicon: Lexicon, i: Int): Boolean =
- i < len && {
- lexicon.tab.get(str.charAt(i)) match {
- case Some((tip, lex)) => tip && i + 1 == len || is_lit(lex, i + 1)
- case None => false
- }
+ private case class Tree(val branches: Map[Char, (String, Tree)])
+ private val empty_tree = Tree(Map())
+
+ private def make(tree: Tree, max: Int): Lexicon =
+ new Lexicon {
+ override val main_tree = tree
+ override val max_entry = max
}
- is_lit(lx, 0)
+
+ val empty: Lexicon = new Lexicon
+ def apply(strs: String*): Lexicon = (empty /: strs) ((lex, str) => lex + str)
}
- def extend_lexicon(lx: Lexicon, str: CharSequence): Lexicon =
+ class Lexicon extends scala.collection.immutable.Set[String] with RegexParsers
{
- val len = str.length
- def ext(lexicon: Lexicon, i: Int): Lexicon =
- if (i == len) lexicon
- else {
- val c = str.charAt(i)
- val is_last = (i + 1 == len)
- lexicon.tab.get(c) match {
- case Some((tip, lex)) => Lexicon(lexicon.tab + (c -> (tip || is_last, ext(lex, i + 1))))
- case None => Lexicon(lexicon.tab + (c -> (is_last, ext(empty_lexicon, i + 1))))
- }
- }
- if (is_literal(lx, str)) lx else ext(lx, 0)
- }
+ /* representation */
-}
+ import Lexicon.Tree
+ val main_tree: Tree = Lexicon.empty_tree
+ val max_entry = -1
-class Scan extends RegexParsers
-{
- override val whiteSpace = "".r
+ /* Set methods */
+
+ override def stringPrefix = "Lexicon"
+
+ override def isEmpty: Boolean = (max_entry < 0)
+
+ private def destruct: List[String] =
+ {
+ def dest(tree: Tree, result: List[String]): List[String] =
+ (result /: tree.branches.toList) ((res, entry) =>
+ entry match { case (_, (s, tr)) =>
+ if (s.isEmpty) dest(tr, res) else dest(tr, s :: res) })
+ dest(main_tree, Nil).sort(_ <= _)
+ }
+
+ def size: Int = destruct.length
+ def elements: Iterator[String] = destruct.elements
- def keyword(lx: Scan.Lexicon): Parser[String] = new Parser[String] {
- def apply(in: Input) =
+ def contains(str: String): Boolean =
{
- val source = in.source
- val offset = in.offset
- val len = source.length - offset
+ val len = str.length
+ def check(tree: Tree, i: Int): Boolean =
+ i < len && {
+ tree.branches.get(str.charAt(i)) match {
+ case Some((s, tr)) => !s.isEmpty && i + 1 == len || check(tr, i + 1)
+ case None => false
+ }
+ }
+ check(main_tree, 0)
+ }
- def scan(lexicon: Scan.Lexicon, i: Int, res: Int): Int =
+ def +(str: String): Lexicon =
+ {
+ val len = str.length
+ def extend(tree: Tree, i: Int): Tree =
{
if (i < len) {
- lexicon.tab.get(source.charAt(offset + i)) match {
- case Some((tip, lex)) => scan(lex, i + 1, if (tip) i + 1 else res)
- case None => res
+ val c = str.charAt(i)
+ val end = (i + 1 == len)
+ tree.branches.get(c) match {
+ case Some((s, tr)) =>
+ Tree(tree.branches + (c -> (if (end) str else s, extend(tr, i + 1))))
+ case None =>
+ Tree(tree.branches + (c -> (if (end) str else "", extend(Lexicon.empty_tree, i + 1))))
}
- } else res
+ } else tree
}
- scan(lx, 0, 0) match {
- case res if res > 0 =>
- Success(source.subSequence(offset, res).toString, in.drop(res))
- case _ => Failure("keyword expected", in)
+ if (contains(str)) this
+ else Lexicon.make(extend(main_tree, 0), max_entry max str.length)
+ }
+
+ def empty[A]: Set[A] = error("Undefined")
+ def -(str: String): Lexicon = error("Undefined")
+
+
+ /* RegexParsers methods */
+
+ override val whiteSpace = "".r
+
+ def keyword: Parser[String] = new Parser[String] {
+ def apply(in: Input) =
+ {
+ val source = in.source
+ val offset = in.offset
+ val len = source.length - offset
+
+ def scan(tree: Tree, i: Int, res: String): String =
+ {
+ if (i < len) {
+ tree.branches.get(source.charAt(offset + i)) match {
+ case Some((s, tr)) => scan(tr, i + 1, if (s.isEmpty) res else s)
+ case None => res
+ }
+ } else res
+ }
+ val res = scan(main_tree, 0, "")
+ if (res.isEmpty) Failure("keyword expected", in)
+ else Success(res, in.drop(res.length))
}
- }
- }.named("keyword")
+ }.named("keyword")
+ }
}