src/Pure/General/scan.scala
changeset 34143 ded454429df3
parent 34140 31be1235d0fb
child 34144 bd7b3b91abab
equal deleted inserted replaced
34142:b6686c21a065 34143:ded454429df3
   166     def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1)
   166     def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1)
   167     def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE)
   167     def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE)
   168     def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE)
   168     def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE)
   169 
   169 
   170 
   170 
   171     /* delimited text */
   171     /* quoted strings */
   172 
   172 
   173     def quoted(quote: String) =
   173     private def quoted(quote: String): Parser[String] =
       
   174     {
   174       quote ~
   175       quote ~
   175         rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
   176         rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
   176           "\\" + quote | "\\\\" |
   177           "\\" + quote | "\\\\" |
   177           (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
   178           (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
   178       quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
   179       quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
       
   180     }.named("quoted")
   179 
   181 
   180     def quoted_content(quote: String, source: String): String =
   182     def quoted_content(quote: String, source: String): String =
   181     {
   183     {
   182       require(parseAll(quoted(quote), source).successful)
   184       require(parseAll(quoted(quote), source).successful)
   183       source.substring(1, source.length - 1)  // FIXME proper escapes, recode utf8 (!?)
   185       source.substring(1, source.length - 1)  // FIXME proper escapes, recode utf8 (!?)
   184     }
   186     }
   185 
   187 
   186 
   188 
   187     def verbatim =
   189     /* verbatim text */
       
   190 
       
   191     private def verbatim: Parser[String] =
       
   192     {
   188       "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
   193       "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
   189         { case x ~ ys ~ z => x + ys.mkString + z }
   194         { case x ~ ys ~ z => x + ys.mkString + z }
       
   195     }.named("verbatim")
   190 
   196 
   191     def verbatim_content(source: String): String =
   197     def verbatim_content(source: String): String =
   192     {
   198     {
   193       require(parseAll(verbatim, source).successful)
   199       require(parseAll(verbatim, source).successful)
       
   200       source.substring(2, source.length - 2)
       
   201     }
       
   202 
       
   203 
       
   204     /* nested comments */
       
   205 
       
   206     def comment: Parser[String] = new Parser[String]
       
   207     {
       
   208       val comment_text =
       
   209         rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym)) |
       
   210           """\*(?!\))|\((?!\*)""".r)
       
   211       val comment_open = "(*" ~ comment_text ^^ (_ => ())
       
   212       val comment_close = comment_text ~ "*)" ^^ (_ => ())
       
   213 
       
   214       def apply(in: Input) =
       
   215       {
       
   216         var rest = in
       
   217         def try_parse(p: Parser[Unit]): Boolean =
       
   218         {
       
   219           parse(p, rest) match {
       
   220             case Success(_, next) => { rest = next; true }
       
   221             case _ => false
       
   222           }
       
   223         }
       
   224         var depth = 0
       
   225         var finished = false
       
   226         while (!finished) {
       
   227           if (try_parse(comment_open)) depth += 1
       
   228           else if (depth > 0 && try_parse(comment_close)) depth -= 1
       
   229           else finished = true
       
   230         }
       
   231         if (in.offset < rest.offset && depth == 0)
       
   232           Success(in.source.subSequence(in.offset, rest.offset).toString, rest)
       
   233         else Failure("comment expected", in)
       
   234       }
       
   235     }.named("comment")
       
   236 
       
   237     def comment_content(source: String): String =
       
   238     {
       
   239       require(parseAll(comment, source).successful)
   194       source.substring(2, source.length - 2)
   240       source.substring(2, source.length - 2)
   195     }
   241     }
   196 
   242 
   197 
   243 
   198     /* outer syntax tokens */
   244     /* outer syntax tokens */
   222       val space = many1(symbols.is_blank) ^^ Space
   268       val space = many1(symbols.is_blank) ^^ Space
   223 
   269 
   224       val string = quoted("\"") ^^ String_Token
   270       val string = quoted("\"") ^^ String_Token
   225       val alt_string = quoted("`") ^^ Alt_String_Token
   271       val alt_string = quoted("`") ^^ Alt_String_Token
   226 
   272 
   227       val comment: Parser[Token] = failure("FIXME")
       
   228 
       
   229       val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input
   273       val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input
   230 
   274 
   231 
   275 
   232       /* tokens */
   276       /* tokens */
   233 
   277 
   234       // FIXME right-assoc !?
   278       // FIXME right-assoc !?
   235 
   279 
   236       (string | alt_string | verbatim ^^ Verbatim | comment | space) |
   280       (string | alt_string | verbatim ^^ Verbatim | comment ^^ Comment | space) |
   237       ((ident | var_ | type_ident | type_var | nat_ | sym_ident) |||
   281       ((ident | var_ | type_ident | type_var | nat_ | sym_ident) |||
   238         keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x))) |
   282         keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x))) |
   239       bad_input
   283       bad_input
   240     }
   284     }
   241   }
   285   }
   242 }
   286 }
   243