# HG changeset patch # User wenzelm # Date 1245170865 -7200 # Node ID a11ea667d676368ee86dc4b758e4b85d6180e71b # Parent 31b1f296515bcbf0606e263a33635e891447a1d9 reorganized and abstracted version, via Set trait; diff -r 31b1f296515b -r a11ea667d676 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Tue Jun 16 15:25:32 2009 +0200 +++ b/src/Pure/General/scan.scala Tue Jun 16 18:47:45 2009 +0200 @@ -1,7 +1,7 @@ /* Title: Pure/General/scan.scala Author: Makarius -Efficient scanning of literals. +Efficient scanning of keywords. */ package isabelle @@ -12,71 +12,113 @@ object Scan { - /* class Lexicon -- position tree */ - - case class Lexicon(val tab: Map[Char, (Boolean, Lexicon)]) + /** Lexicon -- position tree **/ - val empty_lexicon = Lexicon(Map()) - - def is_literal(lx: Lexicon, str: CharSequence): Boolean = + object Lexicon { - val len = str.length - def is_lit(lexicon: Lexicon, i: Int): Boolean = - i < len && { - lexicon.tab.get(str.charAt(i)) match { - case Some((tip, lex)) => tip && i + 1 == len || is_lit(lex, i + 1) - case None => false - } + private case class Tree(val branches: Map[Char, (String, Tree)]) + private val empty_tree = Tree(Map()) + + private def make(tree: Tree, max: Int): Lexicon = + new Lexicon { + override val main_tree = tree + override val max_entry = max } - is_lit(lx, 0) + + val empty: Lexicon = new Lexicon + def apply(strs: String*): Lexicon = (empty /: strs) ((lex, str) => lex + str) } - def extend_lexicon(lx: Lexicon, str: CharSequence): Lexicon = + class Lexicon extends scala.collection.immutable.Set[String] with RegexParsers { - val len = str.length - def ext(lexicon: Lexicon, i: Int): Lexicon = - if (i == len) lexicon - else { - val c = str.charAt(i) - val is_last = (i + 1 == len) - lexicon.tab.get(c) match { - case Some((tip, lex)) => Lexicon(lexicon.tab + (c -> (tip || is_last, ext(lex, i + 1)))) - case None => Lexicon(lexicon.tab + (c -> (is_last, ext(empty_lexicon, i + 1)))) - } - } - if (is_literal(lx, str)) lx else ext(lx, 0) - } + /* representation */ -} + import Lexicon.Tree + val main_tree: Tree = Lexicon.empty_tree + val max_entry = -1 -class Scan extends RegexParsers -{ - override val whiteSpace = "".r + /* Set methods */ + + override def stringPrefix = "Lexicon" + + override def isEmpty: Boolean = (max_entry < 0) + + private def destruct: List[String] = + { + def dest(tree: Tree, result: List[String]): List[String] = + (result /: tree.branches.toList) ((res, entry) => + entry match { case (_, (s, tr)) => + if (s.isEmpty) dest(tr, res) else dest(tr, s :: res) }) + dest(main_tree, Nil).sort(_ <= _) + } + + def size: Int = destruct.length + def elements: Iterator[String] = destruct.elements - def keyword(lx: Scan.Lexicon): Parser[String] = new Parser[String] { - def apply(in: Input) = + def contains(str: String): Boolean = { - val source = in.source - val offset = in.offset - val len = source.length - offset + val len = str.length + def check(tree: Tree, i: Int): Boolean = + i < len && { + tree.branches.get(str.charAt(i)) match { + case Some((s, tr)) => !s.isEmpty && i + 1 == len || check(tr, i + 1) + case None => false + } + } + check(main_tree, 0) + } - def scan(lexicon: Scan.Lexicon, i: Int, res: Int): Int = + def +(str: String): Lexicon = + { + val len = str.length + def extend(tree: Tree, i: Int): Tree = { if (i < len) { - lexicon.tab.get(source.charAt(offset + i)) match { - case Some((tip, lex)) => scan(lex, i + 1, if (tip) i + 1 else res) - case None => res + val c = str.charAt(i) + val end = (i + 1 == len) + tree.branches.get(c) match { + case Some((s, tr)) => + Tree(tree.branches + (c -> (if (end) str else s, extend(tr, i + 1)))) + case None => + Tree(tree.branches + (c -> (if (end) str else "", extend(Lexicon.empty_tree, i + 1)))) } - } else res + } else tree } - scan(lx, 0, 0) match { - case res if res > 0 => - Success(source.subSequence(offset, res).toString, in.drop(res)) - case _ => Failure("keyword expected", in) + if (contains(str)) this + else Lexicon.make(extend(main_tree, 0), max_entry max str.length) + } + + def empty[A]: Set[A] = error("Undefined") + def -(str: String): Lexicon = error("Undefined") + + + /* RegexParsers methods */ + + override val whiteSpace = "".r + + def keyword: Parser[String] = new Parser[String] { + def apply(in: Input) = + { + val source = in.source + val offset = in.offset + val len = source.length - offset + + def scan(tree: Tree, i: Int, res: String): String = + { + if (i < len) { + tree.branches.get(source.charAt(offset + i)) match { + case Some((s, tr)) => scan(tr, i + 1, if (s.isEmpty) res else s) + case None => res + } + } else res + } + val res = scan(main_tree, 0, "") + if (res.isEmpty) Failure("keyword expected", in) + else Success(res, in.drop(res.length)) } - } - }.named("keyword") + }.named("keyword") + } }