changeset 75393 | 87ebf5a50283 |
parent 75384 | 20093a63d03b |
child 75420 | 73a2f3fe0e8c |
75388:b3ca4a6ed74b | 75393:87ebf5a50283 |
---|---|
9 |
9 |
10 import scala.collection.mutable |
10 import scala.collection.mutable |
11 import scala.util.parsing.input.Reader |
11 import scala.util.parsing.input.Reader |
12 |
12 |
13 |
13 |
14 object ML_Lex |
14 object ML_Lex { |
15 { |
|
16 /** keywords **/ |
15 /** keywords **/ |
17 |
16 |
18 val keywords: Set[String] = |
17 val keywords: Set[String] = |
19 Set("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", "=>", |
18 Set("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", "=>", |
20 "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", |
19 "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", |
36 |
35 |
37 |
36 |
38 |
37 |
39 /** tokens **/ |
38 /** tokens **/ |
40 |
39 |
41 object Kind extends Enumeration |
40 object Kind extends Enumeration { |
42 { |
|
43 val KEYWORD = Value("keyword") |
41 val KEYWORD = Value("keyword") |
44 val IDENT = Value("identifier") |
42 val IDENT = Value("identifier") |
45 val LONG_IDENT = Value("long identifier") |
43 val LONG_IDENT = Value("long identifier") |
46 val TYPE_VAR = Value("type variable") |
44 val TYPE_VAR = Value("type variable") |
47 val WORD = Value("word") |
45 val WORD = Value("word") |
61 val ANTIQ_ALT_STRING = Value("antiquotation: back-quoted string") |
59 val ANTIQ_ALT_STRING = Value("antiquotation: back-quoted string") |
62 val ANTIQ_CARTOUCHE = Value("antiquotation: text cartouche") |
60 val ANTIQ_CARTOUCHE = Value("antiquotation: text cartouche") |
63 val ERROR = Value("bad input") |
61 val ERROR = Value("bad input") |
64 } |
62 } |
65 |
63 |
66 sealed case class Token(kind: Kind.Value, source: String) |
64 sealed case class Token(kind: Kind.Value, source: String) { |
67 { |
|
68 def is_keyword: Boolean = kind == Kind.KEYWORD |
65 def is_keyword: Boolean = kind == Kind.KEYWORD |
69 def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source) |
66 def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source) |
70 def is_space: Boolean = kind == Kind.SPACE |
67 def is_space: Boolean = kind == Kind.SPACE |
71 def is_comment: Boolean = kind == Kind.INFORMAL_COMMENT || kind == Kind.FORMAL_COMMENT |
68 def is_comment: Boolean = kind == Kind.INFORMAL_COMMENT || kind == Kind.FORMAL_COMMENT |
72 } |
69 } |
76 /** parsers **/ |
73 /** parsers **/ |
77 |
74 |
78 case object ML_String extends Scan.Line_Context |
75 case object ML_String extends Scan.Line_Context |
79 case class Antiq(ctxt: Scan.Line_Context) extends Scan.Line_Context |
76 case class Antiq(ctxt: Scan.Line_Context) extends Scan.Line_Context |
80 |
77 |
81 private object Parsers extends Scan.Parsers with Antiquote.Parsers with Comment.Parsers |
78 private object Parsers extends Scan.Parsers with Antiquote.Parsers with Comment.Parsers { |
82 { |
|
83 /* string material */ |
79 /* string material */ |
84 |
80 |
85 private val blanks = many(character(Symbol.is_ascii_blank)) |
81 private val blanks = many(character(Symbol.is_ascii_blank)) |
86 private val blanks1 = many1(character(Symbol.is_ascii_blank)) |
82 private val blanks1 = many1(character(Symbol.is_ascii_blank)) |
87 |
83 |
118 "\"" ~ ml_string_body ^^ { case x ~ y => x + y } |
114 "\"" ~ ml_string_body ^^ { case x ~ y => x + y } |
119 |
115 |
120 private val ml_string: Parser[Token] = |
116 private val ml_string: Parser[Token] = |
121 "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) } |
117 "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) } |
122 |
118 |
123 private def ml_string_line(ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = |
119 private def ml_string_line(ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = { |
124 { |
|
125 def result(x: String, c: Scan.Line_Context) = (Token(Kind.STRING, x), c) |
120 def result(x: String, c: Scan.Line_Context) = (Token(Kind.STRING, x), c) |
126 |
121 |
127 ctxt match { |
122 ctxt match { |
128 case Scan.Finished => |
123 case Scan.Finished => |
129 "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^ |
124 "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^ |
183 } |
178 } |
184 |
179 |
185 |
180 |
186 /* token */ |
181 /* token */ |
187 |
182 |
188 private def other_token: Parser[Token] = |
183 private def other_token: Parser[Token] = { |
189 { |
|
190 /* identifiers */ |
184 /* identifiers */ |
191 |
185 |
192 val letdigs = many(character(Symbol.is_ascii_letdig)) |
186 val letdigs = many(character(Symbol.is_ascii_letdig)) |
193 |
187 |
194 val alphanumeric = |
188 val alphanumeric = |
253 } |
247 } |
254 |
248 |
255 def token: Parser[Token] = |
249 def token: Parser[Token] = |
256 ml_char | (ml_string | (ml_comment | (ml_formal_comment | other_token))) |
250 ml_char | (ml_string | (ml_comment | (ml_formal_comment | other_token))) |
257 |
251 |
258 def token_line(SML: Boolean, ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = |
252 def token_line(SML: Boolean, ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] = { |
259 { |
|
260 val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished)) |
253 val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished)) |
261 |
254 |
262 if (SML) ml_string_line(ctxt) | (ml_comment_line(ctxt) | other) |
255 if (SML) ml_string_line(ctxt) | (ml_comment_line(ctxt) | other) |
263 else { |
256 else { |
264 ml_string_line(ctxt) | |
257 ml_string_line(ctxt) | |
277 Parsers.parseAll(Parsers.rep(Parsers.token), Scan.char_reader(input)) match { |
270 Parsers.parseAll(Parsers.rep(Parsers.token), Scan.char_reader(input)) match { |
278 case Parsers.Success(tokens, _) => tokens |
271 case Parsers.Success(tokens, _) => tokens |
279 case _ => error("Unexpected failure of tokenizing input:\n" + input.toString) |
272 case _ => error("Unexpected failure of tokenizing input:\n" + input.toString) |
280 } |
273 } |
281 |
274 |
282 def tokenize_line(SML: Boolean, input: CharSequence, context: Scan.Line_Context) |
275 def tokenize_line( |
283 : (List[Token], Scan.Line_Context) = |
276 SML: Boolean, |
284 { |
277 input: CharSequence, |
278 context: Scan.Line_Context |
|
279 ) : (List[Token], Scan.Line_Context) = { |
|
285 var in: Reader[Char] = Scan.char_reader(input) |
280 var in: Reader[Char] = Scan.char_reader(input) |
286 val toks = new mutable.ListBuffer[Token] |
281 val toks = new mutable.ListBuffer[Token] |
287 var ctxt = context |
282 var ctxt = context |
288 while (!in.atEnd) { |
283 while (!in.atEnd) { |
289 Parsers.parse(Parsers.token_line(SML, ctxt), in) match { |
284 Parsers.parse(Parsers.token_line(SML, ctxt), in) match { |