separate JSON lexer;
authorwenzelm
Thu Oct 26 23:31:03 2017 +0200 (19 months ago)
changeset 669225a476a87a535
parent 66921 3d3bd0718ef2
child 66923 914935f8a462
separate JSON lexer;
src/Pure/General/json.scala
src/Pure/General/scan.scala
     1.1 --- a/src/Pure/General/json.scala	Thu Oct 26 15:08:53 2017 +0200
     1.2 +++ b/src/Pure/General/json.scala	Thu Oct 26 23:31:03 2017 +0200
     1.3 @@ -7,14 +7,107 @@
     1.4  package isabelle
     1.5  
     1.6  
     1.7 +import scala.util.parsing.combinator.lexical.Scanners
     1.8 +import scala.util.parsing.input.CharArrayReader.EofCh
     1.9  import scala.util.parsing.json.{JSONObject, JSONArray}
    1.10  
    1.11 +
    1.12  object JSON
    1.13  {
    1.14    type T = Any
    1.15    type S = String
    1.16  
    1.17  
    1.18 +  /* lexer */
    1.19 +
    1.20 +  object Kind extends Enumeration
    1.21 +  {
    1.22 +    val KEYWORD, STRING, NUMBER, ERROR = Value
    1.23 +  }
    1.24 +
    1.25 +  sealed case class Token(kind: Kind.Value, text: String)
    1.26 +  {
    1.27 +    def is_keyword: Boolean = kind == Kind.KEYWORD
    1.28 +    def is_string: Boolean = kind == Kind.STRING
    1.29 +    def is_number: Boolean = kind == Kind.NUMBER
    1.30 +    def is_error: Boolean = kind == Kind.ERROR
    1.31 +  }
    1.32 +
    1.33 +  object Lexer extends Scanners with Scan.Parsers
    1.34 +  {
    1.35 +    override type Elem = Char
    1.36 +    type Token = JSON.Token
    1.37 +
    1.38 +    def errorToken(msg: String): Token = Token(Kind.ERROR, msg)
    1.39 +
    1.40 +    override val whiteSpace = """\s+""".r
    1.41 +    def whitespace: Parser[Any] = many(character(Symbol.is_ascii_blank(_)))
    1.42 +
    1.43 +    val letter: Parser[String] = one(character(Symbol.is_ascii_letter(_)))
    1.44 +    val letters1: Parser[String] = many1(character(Symbol.is_ascii_letter(_)))
    1.45 +
    1.46 +    def digits: Parser[String] = many(character(Symbol.is_ascii_digit(_)))
    1.47 +    def digits1: Parser[String] = many1(character(Symbol.is_ascii_digit(_)))
    1.48 +
    1.49 +
    1.50 +    /* keyword */
    1.51 +
    1.52 +    def keyword: Parser[Token] =
    1.53 +      elem("keyword", "{}[],:".contains(_)) ^^ (c => Token(Kind.KEYWORD, c.toString)) |
    1.54 +      letters1 ^^ (s =>
    1.55 +        if (s == "true" || s == "false" || s == "null") Token(Kind.KEYWORD, s)
    1.56 +        else errorToken("Bad keyword: " + quote(s)))
    1.57 +
    1.58 +
    1.59 +    /* string */
    1.60 +
    1.61 +    def string: Parser[Token] =
    1.62 +      '\"' ~> rep(string_body) <~ '\"' ^^ (cs => Token(Kind.STRING, cs.mkString))
    1.63 +
    1.64 +    def string_body: Parser[Char] =
    1.65 +      elem("", c => c != '\"' && c != '\\' && c != EofCh) | '\\' ~> string_escape
    1.66 +
    1.67 +    def string_escape: Parser[Char] =
    1.68 +      elem("", "\"\\/".contains(_)) |
    1.69 +      elem("", "bfnrt".contains(_)) ^^
    1.70 +        { case 'b' => '\b' case 'f' => '\f' case 'n' => '\n' case 'r' => '\r' case 't' => '\t' } |
    1.71 +      'u' ~> repeated(character("0123456789abcdefABCDEF".contains(_)), 4, 4) ^^
    1.72 +        (s => Integer.parseInt(s, 16).toChar)
    1.73 +
    1.74 +    def string_failure: Parser[Token] = '\"' ~> failure("Unterminated string")
    1.75 +
    1.76 +
    1.77 +    /* number */
    1.78 +
    1.79 +    def number: Parser[Token] =
    1.80 +      opt("-" <~ whitespace) ~ number_body ~ opt(letter) ^^ {
    1.81 +        case a ~ b ~ None => Token(Kind.NUMBER, a.getOrElse("") + b)
    1.82 +        case a ~ b ~ Some(c) =>
    1.83 +          errorToken("Invalid number format: " + quote(a.getOrElse("") + b + c))
    1.84 +      }
    1.85 +
    1.86 +    def number_body: Parser[String] =
    1.87 +      (zero | positive) ~ opt(number_fract) ~ opt(number_exp) ^^
    1.88 +        { case a ~ b ~ c => a + b.getOrElse("") + c.getOrElse("") }
    1.89 +
    1.90 +    def number_fract: Parser[String] = "." ~ digits1 ^^ { case a ~ b => a + b }
    1.91 +
    1.92 +    def number_exp: Parser[String] =
    1.93 +      one(character("eE".contains(_))) ~ maybe(character("-+".contains(_))) ~ digits1 ^^
    1.94 +        { case a ~ b ~ c => a + b + c }
    1.95 +
    1.96 +    def zero = one(character(c => c == '0'))
    1.97 +    def nonzero = one(character(c => c != '0' && Symbol.is_ascii_digit(c)))
    1.98 +    def positive: Parser[String] = nonzero ~ digits ^^ { case a ~ b => a + b }
    1.99 +
   1.100 +
   1.101 +    /* token */
   1.102 +
   1.103 +    def token: Parser[Token] =
   1.104 +      keyword | (string | (string_failure | (number | failure("Illegal character"))))
   1.105 +  }
   1.106 +
   1.107 +
   1.108    /* standard format */
   1.109  
   1.110    def parse(s: S): T = Format.unapply(s) getOrElse error("Malformed JSON")
     2.1 --- a/src/Pure/General/scan.scala	Thu Oct 26 15:08:53 2017 +0200
     2.2 +++ b/src/Pure/General/scan.scala	Thu Oct 26 23:31:03 2017 +0200
     2.3 @@ -74,6 +74,9 @@
     2.4      def one(pred: Symbol.Symbol => Boolean): Parser[String] =
     2.5        repeated(pred, 1, 1)
     2.6  
     2.7 +    def maybe(pred: Symbol.Symbol => Boolean): Parser[String] =
     2.8 +      repeated(pred, 0, 1)
     2.9 +
    2.10      def many(pred: Symbol.Symbol => Boolean): Parser[String] =
    2.11        repeated(pred, 0, Integer.MAX_VALUE)
    2.12