src/Pure/ML/ml_lex.scala
changeset 75393 87ebf5a50283
parent 75384 20093a63d03b
child 75420 73a2f3fe0e8c
equal deleted inserted replaced
75388:b3ca4a6ed74b 75393:87ebf5a50283
     9 
     9 
    10 import scala.collection.mutable
    10 import scala.collection.mutable
    11 import scala.util.parsing.input.Reader
    11 import scala.util.parsing.input.Reader
    12 
    12 
    13 
    13 
    14 object ML_Lex
    14 object ML_Lex {
    15 {
       
    16   /** keywords **/
    15   /** keywords **/
    17 
    16 
    18   val keywords: Set[String] =
    17   val keywords: Set[String] =
    19     Set("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", "=>",
    18     Set("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", "=>",
    20       "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
    19       "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
    36 
    35 
    37 
    36 
    38 
    37 
    39   /** tokens **/
    38   /** tokens **/
    40 
    39 
    41   object Kind extends Enumeration
    40   object Kind extends Enumeration {
    42   {
       
    43     val KEYWORD = Value("keyword")
    41     val KEYWORD = Value("keyword")
    44     val IDENT = Value("identifier")
    42     val IDENT = Value("identifier")
    45     val LONG_IDENT = Value("long identifier")
    43     val LONG_IDENT = Value("long identifier")
    46     val TYPE_VAR = Value("type variable")
    44     val TYPE_VAR = Value("type variable")
    47     val WORD = Value("word")
    45     val WORD = Value("word")
    61     val ANTIQ_ALT_STRING = Value("antiquotation: back-quoted string")
    59     val ANTIQ_ALT_STRING = Value("antiquotation: back-quoted string")
    62     val ANTIQ_CARTOUCHE = Value("antiquotation: text cartouche")
    60     val ANTIQ_CARTOUCHE = Value("antiquotation: text cartouche")
    63     val ERROR = Value("bad input")
    61     val ERROR = Value("bad input")
    64   }
    62   }
    65 
    63 
    66   sealed case class Token(kind: Kind.Value, source: String)
    64   sealed case class Token(kind: Kind.Value, source: String) {
    67   {
       
    68     def is_keyword: Boolean = kind == Kind.KEYWORD
    65     def is_keyword: Boolean = kind == Kind.KEYWORD
    69     def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
    66     def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
    70     def is_space: Boolean = kind == Kind.SPACE
    67     def is_space: Boolean = kind == Kind.SPACE
    71     def is_comment: Boolean = kind == Kind.INFORMAL_COMMENT || kind == Kind.FORMAL_COMMENT
    68     def is_comment: Boolean = kind == Kind.INFORMAL_COMMENT || kind == Kind.FORMAL_COMMENT
    72   }
    69   }
    76   /** parsers **/
    73   /** parsers **/
    77 
    74 
    78   case object ML_String extends Scan.Line_Context
    75   case object ML_String extends Scan.Line_Context
    79   case class Antiq(ctxt: Scan.Line_Context) extends Scan.Line_Context
    76   case class Antiq(ctxt: Scan.Line_Context) extends Scan.Line_Context
    80 
    77 
    81   private object Parsers extends Scan.Parsers with Antiquote.Parsers with Comment.Parsers
    78   private object Parsers extends Scan.Parsers with Antiquote.Parsers with Comment.Parsers {
    82   {
       
    83     /* string material */
    79     /* string material */
    84 
    80 
    85     private val blanks = many(character(Symbol.is_ascii_blank))
    81     private val blanks = many(character(Symbol.is_ascii_blank))
    86     private val blanks1 = many1(character(Symbol.is_ascii_blank))
    82     private val blanks1 = many1(character(Symbol.is_ascii_blank))
    87 
    83 
   118       "\"" ~ ml_string_body ^^ { case x ~ y => x + y }
   114       "\"" ~ ml_string_body ^^ { case x ~ y => x + y }
   119 
   115 
   120     private val ml_string: Parser[Token] =
   116     private val ml_string: Parser[Token] =
   121       "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) }
   117       "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) }
   122 
   118 
   123     private def ml_string_line(ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] =
   119     private def ml_string_line(ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = {
   124     {
       
   125       def result(x: String, c: Scan.Line_Context) = (Token(Kind.STRING, x), c)
   120       def result(x: String, c: Scan.Line_Context) = (Token(Kind.STRING, x), c)
   126 
   121 
   127       ctxt match {
   122       ctxt match {
   128         case Scan.Finished =>
   123         case Scan.Finished =>
   129           "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^
   124           "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^
   183       }
   178       }
   184 
   179 
   185 
   180 
   186     /* token */
   181     /* token */
   187 
   182 
   188     private def other_token: Parser[Token] =
   183     private def other_token: Parser[Token] = {
   189     {
       
   190       /* identifiers */
   184       /* identifiers */
   191 
   185 
   192       val letdigs = many(character(Symbol.is_ascii_letdig))
   186       val letdigs = many(character(Symbol.is_ascii_letdig))
   193 
   187 
   194       val alphanumeric =
   188       val alphanumeric =
   253     }
   247     }
   254 
   248 
   255     def token: Parser[Token] =
   249     def token: Parser[Token] =
   256       ml_char | (ml_string | (ml_comment | (ml_formal_comment | other_token)))
   250       ml_char | (ml_string | (ml_comment | (ml_formal_comment | other_token)))
   257 
   251 
   258     def token_line(SML: Boolean, ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] =
   252     def token_line(SML: Boolean, ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = {
   259     {
       
   260       val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished))
   253       val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished))
   261 
   254 
   262       if (SML) ml_string_line(ctxt) | (ml_comment_line(ctxt) | other)
   255       if (SML) ml_string_line(ctxt) | (ml_comment_line(ctxt) | other)
   263       else {
   256       else {
   264         ml_string_line(ctxt) |
   257         ml_string_line(ctxt) |
   277     Parsers.parseAll(Parsers.rep(Parsers.token), Scan.char_reader(input)) match {
   270     Parsers.parseAll(Parsers.rep(Parsers.token), Scan.char_reader(input)) match {
   278       case Parsers.Success(tokens, _) => tokens
   271       case Parsers.Success(tokens, _) => tokens
   279       case _ => error("Unexpected failure of tokenizing input:\n" + input.toString)
   272       case _ => error("Unexpected failure of tokenizing input:\n" + input.toString)
   280     }
   273     }
   281 
   274 
   282   def tokenize_line(SML: Boolean, input: CharSequence, context: Scan.Line_Context)
   275   def tokenize_line(
   283     : (List[Token], Scan.Line_Context) =
   276     SML: Boolean,
   284   {
   277     input: CharSequence,
       
   278     context: Scan.Line_Context
       
   279   ) : (List[Token], Scan.Line_Context) = {
   285     var in: Reader[Char] = Scan.char_reader(input)
   280     var in: Reader[Char] = Scan.char_reader(input)
   286     val toks = new mutable.ListBuffer[Token]
   281     val toks = new mutable.ListBuffer[Token]
   287     var ctxt = context
   282     var ctxt = context
   288     while (!in.atEnd) {
   283     while (!in.atEnd) {
   289       Parsers.parse(Parsers.token_line(SML, ctxt), in) match {
   284       Parsers.parse(Parsers.token_line(SML, ctxt), in) match {