--- a/src/Pure/General/scan.scala Fri Feb 14 20:58:48 2014 +0100
+++ b/src/Pure/General/scan.scala Fri Feb 14 21:06:20 2014 +0100
@@ -43,9 +43,9 @@
p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
- /* symbol range */
+ /* repeated symbols */
- def symbol_range(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] =
+ def repeated(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] =
new Parser[String]
{
def apply(in: Input) =
@@ -66,16 +66,22 @@
if (count < min_count) Failure("bad input", in)
else Success(in.source.subSequence(start, i).toString, in.drop(i - start))
}
- }.named("symbol_range")
+ }.named("repeated")
def one(pred: Symbol.Symbol => Boolean): Parser[String] =
- symbol_range(pred, 1, 1)
+ repeated(pred, 1, 1)
def many(pred: Symbol.Symbol => Boolean): Parser[String] =
- symbol_range(pred, 0, Integer.MAX_VALUE)
+ repeated(pred, 0, Integer.MAX_VALUE)
def many1(pred: Symbol.Symbol => Boolean): Parser[String] =
- symbol_range(pred, 1, Integer.MAX_VALUE)
+ repeated(pred, 1, Integer.MAX_VALUE)
+
+
+ /* character */
+
+ def character(pred: Char => Boolean): Symbol.Symbol => Boolean =
+ (s: Symbol. Symbol) => s.length == 1 && pred(s.charAt(0))
/* quoted strings */
@@ -279,7 +285,7 @@
/* keyword */
- def keyword(lexicon: Lexicon): Parser[String] = new Parser[String]
+ def literal(lexicon: Lexicon): Parser[String] = new Parser[String]
{
def apply(in: Input) =
{
--- a/src/Pure/General/symbol.scala Fri Feb 14 20:58:48 2014 +0100
+++ b/src/Pure/General/symbol.scala Fri Feb 14 21:06:20 2014 +0100
@@ -19,9 +19,16 @@
/* ASCII characters */
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
+
+ def is_ascii_hex(c: Char): Boolean =
+ '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
+
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
+ def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
+
def is_ascii_letdig(c: Char): Boolean =
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
--- a/src/Pure/Isar/completion.scala Fri Feb 14 20:58:48 2014 +0100
+++ b/src/Pure/Isar/completion.scala Fri Feb 14 21:06:20 2014 +0100
@@ -185,7 +185,7 @@
line: CharSequence): Option[Completion.Result] =
{
val raw_result =
- Scan.Parsers.parse(Scan.Parsers.keyword(abbrevs_lex), new Library.Reverse(line)) match {
+ Scan.Parsers.parse(Scan.Parsers.literal(abbrevs_lex), new Library.Reverse(line)) match {
case Scan.Parsers.Success(reverse_a, _) =>
val abbrevs = abbrevs_map.get_list(reverse_a)
abbrevs match {
--- a/src/Pure/Isar/token.scala Fri Feb 14 20:58:48 2014 +0100
+++ b/src/Pure/Isar/token.scala Fri Feb 14 21:06:20 2014 +0100
@@ -81,7 +81,7 @@
(x => Token(Token.Kind.SYM_IDENT, x))
val command_keyword =
- keyword(lexicon) ^^
+ literal(lexicon) ^^
(x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/ML/ml_lex.scala Fri Feb 14 21:06:20 2014 +0100
@@ -0,0 +1,162 @@
+/* Title: Pure/ML/ml_lex.scala
+ Author: Makarius
+
+Lexical syntax for SML.
+*/
+
+package isabelle
+
+import scala.util.parsing.input.{Reader, CharSequenceReader}
+
+
+object ML_Lex
+{
+ /** tokens **/
+
+ object Kind extends Enumeration
+ {
+ val KEYWORD = Value("keyword")
+ val IDENT = Value("identifier")
+ val LONG_IDENT = Value("long identifier")
+ val TYPE_VAR = Value("type variable")
+ val WORD = Value("word")
+ val INT = Value("integer")
+ val REAL = Value("real")
+ val CHAR = Value("character")
+ val STRING = Value("quoted string")
+ val SPACE = Value("white space")
+ val COMMENT = Value("comment text")
+ val ERROR = Value("bad input")
+ }
+
+ sealed case class Token(val kind: Kind.Value, val source: String)
+
+
+
+ /** parsers **/
+
+ private val lexicon =
+ Scan.Lexicon("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
+ "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
+ "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
+ "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
+ "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
+ "sharing", "sig", "signature", "struct", "structure", "then", "type",
+ "val", "where", "while", "with", "withtype")
+
+ private object Parsers extends Scan.Parsers
+ {
+ /* string material */
+
+ private val blanks1 = many1(character(Symbol.is_ascii_blank))
+
+ private val escape =
+ one(character("\"\\abtnvfr".contains(_))) |
+ "^" ~ one(character(c => '@' <= c && c <= '_')) ^^ { case x ~ y => x + y } |
+ repeated(character(Symbol.is_ascii_digit), 3, 3)
+
+ private val str =
+ one(character(c => c != '"' && c != '\\' && ' ' <= c && c <= '~')) |
+ "\\" ~ escape ^^ { case x ~ y => x + y }
+
+ private val gap = "\\" ~ blanks1 ~ "\\" ^^ { case x ~ y ~ z => x + y + z }
+ private val gaps = rep(gap) ^^ (_.mkString)
+
+
+ /* delimited token */
+
+ private def delimited_token: Parser[Token] =
+ {
+ val char =
+ "#\"" ~ gaps ~ str ~ gaps ~ "\"" ^^
+ { case u ~ v ~ x ~ y ~ z => Token(Kind.CHAR, u + v + x + y + z) }
+
+ val string =
+ "\"" ~ (rep(gap | str) ^^ (_.mkString)) ~ "\"" ^^
+ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) }
+
+ val cmt = comment ^^ (x => Token(Kind.COMMENT, x))
+
+ char | (string | cmt)
+ }
+
+ private def other_token: Parser[Token] =
+ {
+ /* identifiers */
+
+ val letdigs = many(character(Symbol.is_ascii_letdig))
+
+ val alphanumeric =
+ one(character(Symbol.is_ascii_letter)) ~ letdigs ^^ { case x ~ y => x + y }
+
+ val symbolic = many1(character("!#$%&*+-/:<=>?@\\^`|~".contains(_)))
+
+ val ident = (alphanumeric | symbolic) ^^ (x => Token(Kind.IDENT, x))
+
+ val long_ident =
+ rep1(alphanumeric ~ "." ^^ { case x ~ y => x + y }) ~
+ (alphanumeric | (symbolic | "=")) ^^
+ { case x ~ y => Token(Kind.LONG_IDENT, x.mkString + y) }
+
+ val type_var = "'" ~ letdigs ^^ { case x ~ y => Token(Kind.TYPE_VAR, x + y) }
+
+
+ /* numerals */
+
+ val dec = many1(character(Symbol.is_ascii_digit))
+ val hex = many1(character(Symbol.is_ascii_hex))
+ val sign = opt("~") ^^ { case Some(x) => x case None => "" }
+ val decint = sign ~ dec ^^ { case x ~ y => x + y }
+ val exp = ("E" | "e") ~ decint ^^ { case x ~ y => x + y }
+
+ val word =
+ ("0wx" ~ hex ^^ { case x ~ y => x + y } | "0w" ~ dec ^^ { case x ~ y => x + y }) ^^
+ (x => Token(Kind.WORD, x))
+
+ val int =
+ sign ~ ("0x" ~ hex ^^ { case x ~ y => x + y } | dec) ^^
+ { case x ~ y => Token(Kind.INT, x + y) }
+
+ val real =
+ (decint ~ "." ~ dec ~ (opt(exp) ^^ { case Some(x) => x case None => "" }) ^^
+ { case x ~ y ~ z ~ w => x + y + z + w } |
+ decint ~ exp ^^ { case x ~ y => x + y }) ^^ (x => Token(Kind.REAL, x))
+
+
+ /* recover delimited */
+
+ val recover_char =
+ "#\"" ~ gaps ~ (opt(str ~ gaps) ^^ { case Some(x ~ y) => x + y case None => "" }) ^^
+ { case x ~ y ~ z => x + y + z }
+
+ val recover_string =
+ "\"" ~ (rep(gap | str) ^^ (_.mkString)) ^^ { case x ~ y => x + y }
+
+ val recover_delimited =
+ (recover_char | (recover_string | recover_comment)) ^^ (x => Token(Kind.ERROR, x))
+
+
+ /* token */
+
+ val space = blanks1 ^^ (x => Token(Kind.SPACE, x))
+
+ val keyword = literal(lexicon) ^^ (x => Token(Kind.KEYWORD, x))
+
+ val bad = one(_ => true) ^^ (x => Token(Kind.ERROR, x))
+
+ space | (recover_delimited |
+ (((word | (real | (int | (long_ident | (ident | type_var))))) ||| keyword) | bad))
+ }
+
+ def token: Parser[Token] = delimited_token | other_token
+ }
+
+ def tokenize(input: CharSequence): List[Token] =
+ {
+ Parsers.parseAll(Parsers.rep(Parsers.token), new CharSequenceReader(input)) match {
+ case Parsers.Success(tokens, _) => tokens
+ case _ => error("Unexpected failure of tokenizing input:\n" + input.toString)
+ }
+ }
+}
+
--- a/src/Pure/build-jars Fri Feb 14 20:58:48 2014 +0100
+++ b/src/Pure/build-jars Fri Feb 14 21:06:20 2014 +0100
@@ -43,6 +43,7 @@
Isar/outer_syntax.scala
Isar/parse.scala
Isar/token.scala
+ ML/ml_lex.scala
PIDE/command.scala
PIDE/document.scala
PIDE/document_id.scala