9 |
9 |
10 import scala.collection.mutable |
10 import scala.collection.mutable |
11 import scala.util.parsing.input |
11 import scala.util.parsing.input |
12 |
12 |
13 |
13 |
14 object Token |
14 object Token { |
15 { |
|
16 /* tokens */ |
15 /* tokens */ |
17 |
16 |
18 object Kind extends Enumeration |
17 object Kind extends Enumeration { |
19 { |
|
20 /*immediate source*/ |
18 /*immediate source*/ |
21 val COMMAND = Value("command") |
19 val COMMAND = Value("command") |
22 val KEYWORD = Value("keyword") |
20 val KEYWORD = Value("keyword") |
23 val IDENT = Value("identifier") |
21 val IDENT = Value("identifier") |
24 val LONG_IDENT = Value("long identifier") |
22 val LONG_IDENT = Value("long identifier") |
44 |
42 |
45 /* parsers */ |
43 /* parsers */ |
46 |
44 |
47 object Parsers extends Parsers |
45 object Parsers extends Parsers |
48 |
46 |
49 trait Parsers extends Scan.Parsers with Comment.Parsers |
47 trait Parsers extends Scan.Parsers with Comment.Parsers { |
50 { |
48 private def delimited_token: Parser[Token] = { |
51 private def delimited_token: Parser[Token] = |
|
52 { |
|
53 val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) |
49 val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) |
54 val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) |
50 val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) |
55 val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x)) |
51 val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x)) |
56 val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x)) |
52 val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x)) |
57 val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x)) |
53 val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x)) |
58 val ctrl = control_cartouche ^^ (x => Token(Token.Kind.CONTROL, x)) |
54 val ctrl = control_cartouche ^^ (x => Token(Token.Kind.CONTROL, x)) |
59 |
55 |
60 string | (alt_string | (cmt | (formal_cmt | (cart | ctrl)))) |
56 string | (alt_string | (cmt | (formal_cmt | (cart | ctrl)))) |
61 } |
57 } |
62 |
58 |
63 private def other_token(keywords: Keyword.Keywords): Parser[Token] = |
59 private def other_token(keywords: Keyword.Keywords): Parser[Token] = { |
64 { |
|
65 val letdigs1 = many1(Symbol.is_letdig) |
60 val letdigs1 = many1(Symbol.is_letdig) |
66 val sub = one(s => s == Symbol.sub_decoded || s == Symbol.sub) |
61 val sub = one(s => s == Symbol.sub_decoded || s == Symbol.sub) |
67 val id = |
62 val id = |
68 one(Symbol.is_letter) ~ |
63 one(Symbol.is_letter) ~ |
69 (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^ |
64 (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^ |
107 } |
102 } |
108 |
103 |
109 def token(keywords: Keyword.Keywords): Parser[Token] = |
104 def token(keywords: Keyword.Keywords): Parser[Token] = |
110 delimited_token | other_token(keywords) |
105 delimited_token | other_token(keywords) |
111 |
106 |
112 def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context) |
107 def token_line( |
113 : Parser[(Token, Scan.Line_Context)] = |
108 keywords: Keyword.Keywords, |
114 { |
109 ctxt: Scan.Line_Context |
|
110 ) : Parser[(Token, Scan.Line_Context)] = { |
115 val string = |
111 val string = |
116 quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } |
112 quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } |
117 val alt_string = |
113 val alt_string = |
118 quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) } |
114 quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) } |
119 val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) } |
115 val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) } |
133 Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), Scan.char_reader(inp)) match { |
129 Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), Scan.char_reader(inp)) match { |
134 case Parsers.Success(tokens, _) => tokens |
130 case Parsers.Success(tokens, _) => tokens |
135 case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString) |
131 case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString) |
136 } |
132 } |
137 |
133 |
138 def explode_line(keywords: Keyword.Keywords, inp: CharSequence, context: Scan.Line_Context) |
134 def explode_line( |
139 : (List[Token], Scan.Line_Context) = |
135 keywords: Keyword.Keywords, |
140 { |
136 inp: CharSequence, |
|
137 context: Scan.Line_Context |
|
138 ) : (List[Token], Scan.Line_Context) = { |
141 var in: input.Reader[Char] = Scan.char_reader(inp) |
139 var in: input.Reader[Char] = Scan.char_reader(inp) |
142 val toks = new mutable.ListBuffer[Token] |
140 val toks = new mutable.ListBuffer[Token] |
143 var ctxt = context |
141 var ctxt = context |
144 while (!in.atEnd) { |
142 while (!in.atEnd) { |
145 Parsers.parse(Parsers.token_line(keywords, ctxt), in) match { |
143 Parsers.parse(Parsers.token_line(keywords, ctxt), in) match { |
195 } |
193 } |
196 |
194 |
197 |
195 |
198 /* token reader */ |
196 /* token reader */ |
199 |
197 |
200 object Pos |
198 object Pos { |
201 { |
|
202 val none: Pos = new Pos(0, 0, "", "") |
199 val none: Pos = new Pos(0, 0, "", "") |
203 val start: Pos = new Pos(1, 1, "", "") |
200 val start: Pos = new Pos(1, 1, "", "") |
204 def file(file: String): Pos = new Pos(1, 1, file, "") |
201 def file(file: String): Pos = new Pos(1, 1, file, "") |
205 def id(id: String): Pos = new Pos(0, 1, "", id) |
202 def id(id: String): Pos = new Pos(0, 1, "", id) |
206 val command: Pos = id(Markup.COMMAND) |
203 val command: Pos = id(Markup.COMMAND) |
209 final class Pos private[Token]( |
206 final class Pos private[Token]( |
210 val line: Int, |
207 val line: Int, |
211 val offset: Symbol.Offset, |
208 val offset: Symbol.Offset, |
212 val file: String, |
209 val file: String, |
213 val id: String) |
210 val id: String) |
214 extends input.Position |
211 extends input.Position { |
215 { |
|
216 def column = 0 |
212 def column = 0 |
217 def lineContents = "" |
213 def lineContents = "" |
218 |
214 |
219 def advance(token: Token): Pos = advance(token.source) |
215 def advance(token: Token): Pos = advance(token.source) |
220 def advance(source: String): Pos = |
216 def advance(source: String): Pos = { |
221 { |
|
222 var line1 = line |
217 var line1 = line |
223 var offset1 = offset |
218 var offset1 = offset |
224 for (s <- Symbol.iterator(source)) { |
219 for (s <- Symbol.iterator(source)) { |
225 if (line1 > 0 && Symbol.is_newline(s)) line1 += 1 |
220 if (line1 > 0 && Symbol.is_newline(s)) line1 += 1 |
226 if (offset1 > 0) offset1 += 1 |
221 if (offset1 > 0) offset1 += 1 |
243 override def toString: String = Position.here(position(), delimited = false) |
238 override def toString: String = Position.here(position(), delimited = false) |
244 } |
239 } |
245 |
240 |
246 abstract class Reader extends input.Reader[Token] |
241 abstract class Reader extends input.Reader[Token] |
247 |
242 |
248 private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader |
243 private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader { |
249 { |
|
250 def first: Token = tokens.head |
244 def first: Token = tokens.head |
251 def rest: Token_Reader = new Token_Reader(tokens.tail, pos.advance(first)) |
245 def rest: Token_Reader = new Token_Reader(tokens.tail, pos.advance(first)) |
252 def atEnd: Boolean = tokens.isEmpty |
246 def atEnd: Boolean = tokens.isEmpty |
253 } |
247 } |
254 |
248 |
255 def reader(tokens: List[Token], start: Token.Pos): Reader = |
249 def reader(tokens: List[Token], start: Token.Pos): Reader = |
256 new Token_Reader(tokens, start) |
250 new Token_Reader(tokens, start) |
257 } |
251 } |
258 |
252 |
259 |
253 |
260 sealed case class Token(kind: Token.Kind.Value, source: String) |
254 sealed case class Token(kind: Token.Kind.Value, source: String) { |
261 { |
|
262 def is_command: Boolean = kind == Token.Kind.COMMAND |
255 def is_command: Boolean = kind == Token.Kind.COMMAND |
263 def is_command(name: String): Boolean = kind == Token.Kind.COMMAND && source == name |
256 def is_command(name: String): Boolean = kind == Token.Kind.COMMAND && source == name |
264 def is_keyword: Boolean = kind == Token.Kind.KEYWORD |
257 def is_keyword: Boolean = kind == Token.Kind.KEYWORD |
265 def is_keyword(name: String): Boolean = kind == Token.Kind.KEYWORD && source == name |
258 def is_keyword(name: String): Boolean = kind == Token.Kind.KEYWORD && source == name |
266 def is_keyword(name: Char): Boolean = |
259 def is_keyword(name: Char): Boolean = |
316 else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source) |
309 else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source) |
317 else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source) |
310 else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source) |
318 else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source) |
311 else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source) |
319 else source |
312 else source |
320 |
313 |
321 def is_system_name: Boolean = |
314 def is_system_name: Boolean = { |
322 { |
|
323 val s = content |
315 val s = content |
324 is_name && Path.is_wellformed(s) && |
316 is_name && Path.is_wellformed(s) && |
325 !s.exists(Symbol.is_ascii_blank) && |
317 !s.exists(Symbol.is_ascii_blank) && |
326 !Path.is_reserved(s) |
318 !Path.is_reserved(s) |
327 } |
319 } |