src/Pure/General/scan.scala
author wenzelm
Tue, 16 Jun 2009 15:25:32 +0200
changeset 31648 31b1f296515b
child 31649 a11ea667d676
permissions -rw-r--r--
Efficient scanning of literals.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
31648
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/scan.scala
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     3
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     4
Efficient scanning of literals.
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     5
*/
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     6
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     7
package isabelle
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     8
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
     9
import scala.util.parsing.combinator.RegexParsers
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    10
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    11
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    12
object Scan
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    13
{
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    14
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    15
  /* class Lexicon -- position tree */
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    16
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    17
  case class Lexicon(val tab: Map[Char, (Boolean, Lexicon)])
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    18
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    19
  val empty_lexicon = Lexicon(Map())
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    20
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    21
  def is_literal(lx: Lexicon, str: CharSequence): Boolean =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    22
  {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    23
    val len = str.length
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    24
    def is_lit(lexicon: Lexicon, i: Int): Boolean =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    25
      i < len && {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    26
        lexicon.tab.get(str.charAt(i)) match {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    27
          case Some((tip, lex)) => tip && i + 1 == len || is_lit(lex, i + 1)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    28
          case None => false
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    29
        }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    30
      }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    31
    is_lit(lx, 0)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    32
  }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    33
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    34
  def extend_lexicon(lx: Lexicon, str: CharSequence): Lexicon =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    35
  {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    36
    val len = str.length
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    37
    def ext(lexicon: Lexicon, i: Int): Lexicon =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    38
      if (i == len) lexicon
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    39
      else {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    40
        val c = str.charAt(i)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    41
        val is_last = (i + 1 == len)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    42
        lexicon.tab.get(c) match {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    43
          case Some((tip, lex)) => Lexicon(lexicon.tab + (c -> (tip || is_last, ext(lex, i + 1))))
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    44
          case None => Lexicon(lexicon.tab + (c -> (is_last, ext(empty_lexicon, i + 1))))
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    45
        }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    46
      }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    47
    if (is_literal(lx, str)) lx else ext(lx, 0)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    48
  }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    49
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    50
}
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    51
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    52
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    53
class Scan extends RegexParsers
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    54
{
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    55
  override val whiteSpace = "".r
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    56
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    57
  def keyword(lx: Scan.Lexicon): Parser[String] = new Parser[String] {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    58
    def apply(in: Input) =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    59
    {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    60
      val source = in.source
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    61
      val offset = in.offset
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    62
      val len = source.length - offset
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    63
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    64
      def scan(lexicon: Scan.Lexicon, i: Int, res: Int): Int =
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    65
      {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    66
        if (i < len) {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    67
          lexicon.tab.get(source.charAt(offset + i)) match {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    68
            case Some((tip, lex)) => scan(lex, i + 1, if (tip) i + 1 else res)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    69
            case None => res
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    70
          }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    71
        } else res
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    72
      }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    73
      scan(lx, 0, 0) match {
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    74
        case res if res > 0 =>
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    75
          Success(source.subSequence(offset, res).toString, in.drop(res))
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    76
        case _ => Failure("keyword expected", in)
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    77
      }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    78
    }
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    79
  }.named("keyword")
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    80
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    81
}
31b1f296515b Efficient scanning of literals.
wenzelm
parents:
diff changeset
    82