# HG changeset patch # User wenzelm # Date 1509053463 -7200 # Node ID 5a476a87a535ee0e09dcb68a096687dadaec9d0c # Parent 3d3bd0718ef22326f1d61a118b82a472be0f5f84 separate JSON lexer; diff -r 3d3bd0718ef2 -r 5a476a87a535 src/Pure/General/json.scala --- a/src/Pure/General/json.scala Thu Oct 26 15:08:53 2017 +0200 +++ b/src/Pure/General/json.scala Thu Oct 26 23:31:03 2017 +0200 @@ -7,14 +7,107 @@ package isabelle +import scala.util.parsing.combinator.lexical.Scanners +import scala.util.parsing.input.CharArrayReader.EofCh import scala.util.parsing.json.{JSONObject, JSONArray} + object JSON { type T = Any type S = String + /* lexer */ + + object Kind extends Enumeration + { + val KEYWORD, STRING, NUMBER, ERROR = Value + } + + sealed case class Token(kind: Kind.Value, text: String) + { + def is_keyword: Boolean = kind == Kind.KEYWORD + def is_string: Boolean = kind == Kind.STRING + def is_number: Boolean = kind == Kind.NUMBER + def is_error: Boolean = kind == Kind.ERROR + } + + object Lexer extends Scanners with Scan.Parsers + { + override type Elem = Char + type Token = JSON.Token + + def errorToken(msg: String): Token = Token(Kind.ERROR, msg) + + override val whiteSpace = """\s+""".r + def whitespace: Parser[Any] = many(character(Symbol.is_ascii_blank(_))) + + val letter: Parser[String] = one(character(Symbol.is_ascii_letter(_))) + val letters1: Parser[String] = many1(character(Symbol.is_ascii_letter(_))) + + def digits: Parser[String] = many(character(Symbol.is_ascii_digit(_))) + def digits1: Parser[String] = many1(character(Symbol.is_ascii_digit(_))) + + + /* keyword */ + + def keyword: Parser[Token] = + elem("keyword", "{}[],:".contains(_)) ^^ (c => Token(Kind.KEYWORD, c.toString)) | + letters1 ^^ (s => + if (s == "true" || s == "false" || s == "null") Token(Kind.KEYWORD, s) + else errorToken("Bad keyword: " + quote(s))) + + + /* string */ + + def string: Parser[Token] = + '\"' ~> rep(string_body) <~ '\"' ^^ (cs => Token(Kind.STRING, cs.mkString)) + + def string_body: Parser[Char] = + elem("", c => c != '\"' && c != '\\' && c != EofCh) | '\\' ~> string_escape + + def string_escape: Parser[Char] = + elem("", "\"\\/".contains(_)) | + elem("", "bfnrt".contains(_)) ^^ + { case 'b' => '\b' case 'f' => '\f' case 'n' => '\n' case 'r' => '\r' case 't' => '\t' } | + 'u' ~> repeated(character("0123456789abcdefABCDEF".contains(_)), 4, 4) ^^ + (s => Integer.parseInt(s, 16).toChar) + + def string_failure: Parser[Token] = '\"' ~> failure("Unterminated string") + + + /* number */ + + def number: Parser[Token] = + opt("-" <~ whitespace) ~ number_body ~ opt(letter) ^^ { + case a ~ b ~ None => Token(Kind.NUMBER, a.getOrElse("") + b) + case a ~ b ~ Some(c) => + errorToken("Invalid number format: " + quote(a.getOrElse("") + b + c)) + } + + def number_body: Parser[String] = + (zero | positive) ~ opt(number_fract) ~ opt(number_exp) ^^ + { case a ~ b ~ c => a + b.getOrElse("") + c.getOrElse("") } + + def number_fract: Parser[String] = "." ~ digits1 ^^ { case a ~ b => a + b } + + def number_exp: Parser[String] = + one(character("eE".contains(_))) ~ maybe(character("-+".contains(_))) ~ digits1 ^^ + { case a ~ b ~ c => a + b + c } + + def zero = one(character(c => c == '0')) + def nonzero = one(character(c => c != '0' && Symbol.is_ascii_digit(c))) + def positive: Parser[String] = nonzero ~ digits ^^ { case a ~ b => a + b } + + + /* token */ + + def token: Parser[Token] = + keyword | (string | (string_failure | (number | failure("Illegal character")))) + } + + /* standard format */ def parse(s: S): T = Format.unapply(s) getOrElse error("Malformed JSON") diff -r 3d3bd0718ef2 -r 5a476a87a535 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Thu Oct 26 15:08:53 2017 +0200 +++ b/src/Pure/General/scan.scala Thu Oct 26 23:31:03 2017 +0200 @@ -74,6 +74,9 @@ def one(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 1, 1) + def maybe(pred: Symbol.Symbol => Boolean): Parser[String] = + repeated(pred, 0, 1) + def many(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 0, Integer.MAX_VALUE)