src/Pure/General/scan.scala
changeset 40523 1050315f6ee2
parent 40290 47f572aff50a
child 42441 781c622af16a
equal deleted inserted replaced
40522:37b79d789d91 40523:1050315f6ee2
     1 /*  Title:      Pure/General/scan.scala
     1 /*  Title:      Pure/General/scan.scala
     2     Author:     Makarius
     2     Author:     Makarius
     3 
     3 
     4 Efficient scanning of keywords.
     4 Efficient scanning of keywords and tokens.
     5 */
     5 */
     6 
     6 
     7 package isabelle
     7 package isabelle
     8 
     8 
     9 
     9 
   179     /* quoted strings */
   179     /* quoted strings */
   180 
   180 
   181     private def quoted(quote: String): Parser[String] =
   181     private def quoted(quote: String): Parser[String] =
   182     {
   182     {
   183       quote ~
   183       quote ~
   184         rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
   184         rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
   185           "\\" + quote | "\\\\" |
       
   186           (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
   185           (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
   187       quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
   186       quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
   188     }.named("quoted")
   187     }.named("quoted")
   189 
   188 
   190     def quoted_content(quote: String, source: String): String =
   189     def quoted_content(quote: String, source: String): String =
   191     {
   190     {
   192       require(parseAll(quoted(quote), source).successful)
   191       require(parseAll(quoted(quote), source).successful)
   193       val body = source.substring(1, source.length - 1)
   192       val body = source.substring(1, source.length - 1)
   194       if (body.exists(_ == '\\')) {
   193       if (body.exists(_ == '\\')) {
   195         val content =
   194         val content =
   196           rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
   195           rep(many1(sym => sym != quote && sym != "\\") |
   197               "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
   196               "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
   198         parseAll(content ^^ (_.mkString), body).get
   197         parseAll(content ^^ (_.mkString), body).get
   199       }
   198       }
   200       else body
   199       else body
   201     }
   200     }
   203 
   202 
   204     /* verbatim text */
   203     /* verbatim text */
   205 
   204 
   206     private def verbatim: Parser[String] =
   205     private def verbatim: Parser[String] =
   207     {
   206     {
   208       "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_wellformed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
   207       "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^
   209         { case x ~ ys ~ z => x + ys.mkString + z }
   208         { case x ~ ys ~ z => x + ys.mkString + z }
   210     }.named("verbatim")
   209     }.named("verbatim")
   211 
   210 
   212     def verbatim_content(source: String): String =
   211     def verbatim_content(source: String): String =
   213     {
   212     {
   219     /* nested comments */
   218     /* nested comments */
   220 
   219 
   221     def comment: Parser[String] = new Parser[String]
   220     def comment: Parser[String] = new Parser[String]
   222     {
   221     {
   223       val comment_text =
   222       val comment_text =
   224         rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_wellformed(sym)) |
   223         rep(many1(sym => sym != "*" && sym != "(") | """\*(?!\))|\((?!\*)""".r)
   225           """\*(?!\))|\((?!\*)""".r)
       
   226       val comment_open = "(*" ~ comment_text ^^^ ()
   224       val comment_open = "(*" ~ comment_text ^^^ ()
   227       val comment_close = comment_text ~ "*)" ^^^ ()
   225       val comment_close = comment_text ~ "*)" ^^^ ()
   228 
   226 
   229       def apply(in: Input) =
   227       def apply(in: Input) =
   230       {
   228       {
   275       val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
   273       val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
   276       val float =
   274       val float =
   277         ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
   275         ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
   278 
   276 
   279       val sym_ident =
   277       val sym_ident =
   280         (many1(symbols.is_symbolic_char) |
   278         (many1(symbols.is_symbolic_char) | one(sym => symbols.is_symbolic(sym))) ^^
   281           one(sym => symbols.is_symbolic(sym) & Symbol.is_wellformed(sym))) ^^
       
   282         (x => Token(Token.Kind.SYM_IDENT, x))
   279         (x => Token(Token.Kind.SYM_IDENT, x))
   283 
   280 
   284       val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
   281       val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
   285 
   282 
   286       val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
   283       val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))