src/Pure/General/scan.scala
changeset 31649 a11ea667d676
parent 31648 31b1f296515b
child 31650 cfaed41ee2c5
--- a/src/Pure/General/scan.scala	Tue Jun 16 15:25:32 2009 +0200
+++ b/src/Pure/General/scan.scala	Tue Jun 16 18:47:45 2009 +0200
@@ -1,7 +1,7 @@
 /*  Title:      Pure/General/scan.scala
     Author:     Makarius
 
-Efficient scanning of literals.
+Efficient scanning of keywords.
 */
 
 package isabelle
@@ -12,71 +12,113 @@
 object Scan
 {
 
-  /* class Lexicon -- position tree */
-
-  case class Lexicon(val tab: Map[Char, (Boolean, Lexicon)])
+  /** Lexicon -- position tree **/
 
-  val empty_lexicon = Lexicon(Map())
-
-  def is_literal(lx: Lexicon, str: CharSequence): Boolean =
+  object Lexicon
   {
-    val len = str.length
-    def is_lit(lexicon: Lexicon, i: Int): Boolean =
-      i < len && {
-        lexicon.tab.get(str.charAt(i)) match {
-          case Some((tip, lex)) => tip && i + 1 == len || is_lit(lex, i + 1)
-          case None => false
-        }
+    private case class Tree(val branches: Map[Char, (String, Tree)])
+    private val empty_tree = Tree(Map())
+
+    private def make(tree: Tree, max: Int): Lexicon =
+      new Lexicon {
+        override val main_tree = tree
+        override val max_entry = max
       }
-    is_lit(lx, 0)
+
+    val empty: Lexicon = new Lexicon
+    def apply(strs: String*): Lexicon = (empty /: strs) ((lex, str) => lex + str)
   }
 
-  def extend_lexicon(lx: Lexicon, str: CharSequence): Lexicon =
+  class Lexicon extends scala.collection.immutable.Set[String] with RegexParsers
   {
-    val len = str.length
-    def ext(lexicon: Lexicon, i: Int): Lexicon =
-      if (i == len) lexicon
-      else {
-        val c = str.charAt(i)
-        val is_last = (i + 1 == len)
-        lexicon.tab.get(c) match {
-          case Some((tip, lex)) => Lexicon(lexicon.tab + (c -> (tip || is_last, ext(lex, i + 1))))
-          case None => Lexicon(lexicon.tab + (c -> (is_last, ext(empty_lexicon, i + 1))))
-        }
-      }
-    if (is_literal(lx, str)) lx else ext(lx, 0)
-  }
+    /* representation */
 
-}
+    import Lexicon.Tree
+    val main_tree: Tree = Lexicon.empty_tree
+    val max_entry = -1
 
 
-class Scan extends RegexParsers
-{
-  override val whiteSpace = "".r
+    /* Set methods */
+
+    override def stringPrefix = "Lexicon"
+
+    override def isEmpty: Boolean = (max_entry < 0)
+
+    private def destruct: List[String] =
+    {
+      def dest(tree: Tree, result: List[String]): List[String] =
+        (result /: tree.branches.toList) ((res, entry) =>
+          entry match { case (_, (s, tr)) =>
+            if (s.isEmpty) dest(tr, res) else dest(tr, s :: res) })
+      dest(main_tree, Nil).sort(_ <= _)
+    }
+
+    def size: Int = destruct.length
+    def elements: Iterator[String] = destruct.elements
 
-  def keyword(lx: Scan.Lexicon): Parser[String] = new Parser[String] {
-    def apply(in: Input) =
+    def contains(str: String): Boolean =
     {
-      val source = in.source
-      val offset = in.offset
-      val len = source.length - offset
+      val len = str.length
+      def check(tree: Tree, i: Int): Boolean =
+        i < len && {
+          tree.branches.get(str.charAt(i)) match {
+            case Some((s, tr)) => !s.isEmpty && i + 1 == len || check(tr, i + 1)
+            case None => false
+          }
+        }
+      check(main_tree, 0)
+    }
 
-      def scan(lexicon: Scan.Lexicon, i: Int, res: Int): Int =
+    def +(str: String): Lexicon =
+    {
+      val len = str.length
+      def extend(tree: Tree, i: Int): Tree =
       {
         if (i < len) {
-          lexicon.tab.get(source.charAt(offset + i)) match {
-            case Some((tip, lex)) => scan(lex, i + 1, if (tip) i + 1 else res)
-            case None => res
+          val c = str.charAt(i)
+          val end = (i + 1 == len)
+          tree.branches.get(c) match {
+            case Some((s, tr)) =>
+              Tree(tree.branches + (c -> (if (end) str else s, extend(tr, i + 1))))
+            case None =>
+              Tree(tree.branches + (c -> (if (end) str else "", extend(Lexicon.empty_tree, i + 1))))
           }
-        } else res
+        } else tree
       }
-      scan(lx, 0, 0) match {
-        case res if res > 0 =>
-          Success(source.subSequence(offset, res).toString, in.drop(res))
-        case _ => Failure("keyword expected", in)
+      if (contains(str)) this
+      else Lexicon.make(extend(main_tree, 0), max_entry max str.length)
+    }
+
+    def empty[A]: Set[A] = error("Undefined")
+    def -(str: String): Lexicon = error("Undefined")
+
+
+    /* RegexParsers methods */
+
+    override val whiteSpace = "".r
+
+    def keyword: Parser[String] = new Parser[String] {
+      def apply(in: Input) =
+      {
+        val source = in.source
+        val offset = in.offset
+        val len = source.length - offset
+
+        def scan(tree: Tree, i: Int, res: String): String =
+        {
+          if (i < len) {
+            tree.branches.get(source.charAt(offset + i)) match {
+              case Some((s, tr)) => scan(tr, i + 1, if (s.isEmpty) res else s)
+              case None => res
+            }
+          } else res
+        }
+        val res = scan(main_tree, 0, "")
+        if (res.isEmpty) Failure("keyword expected", in)
+        else Success(res, in.drop(res.length))
       }
-    }
-  }.named("keyword")
+    }.named("keyword")
 
+  }
 }