121 { |
122 { |
122 def is_space: Boolean = kind == Token.Kind.SPACE |
123 def is_space: Boolean = kind == Token.Kind.SPACE |
123 def is_error: Boolean = kind == Token.Kind.ERROR |
124 def is_error: Boolean = kind == Token.Kind.ERROR |
124 } |
125 } |
125 |
126 |
126 abstract class Chunk { def size: Int; def kind: String = "" } |
127 abstract class Chunk |
127 case class Ignored(source: String) extends Chunk { def size: Int = source.size } |
128 { |
128 case class Malformed(source: String) extends Chunk { def size: Int = source.size } |
129 def size: Int |
129 case class Item(tokens: List[Token]) extends Chunk |
130 def kind: String |
|
131 } |
|
132 |
|
133 case class Ignored(source: String) extends Chunk |
|
134 { |
|
135 def size: Int = source.size |
|
136 def kind: String = "" |
|
137 } |
|
138 |
|
139 case class Item(kind: String, tokens: List[Token]) extends Chunk |
130 { |
140 { |
131 def size: Int = (0 /: tokens)({ case (n, token) => n + token.source.size }) |
141 def size: Int = (0 /: tokens)({ case (n, token) => n + token.source.size }) |
132 |
142 |
133 private val content: (String, List[Token]) = |
143 private val wellformed_content: Option[List[Token]] = |
134 tokens match { |
144 tokens match { |
135 case Token(Token.Kind.KEYWORD, "@") :: body |
145 case Token(Token.Kind.KEYWORD, "@") :: body |
136 if !body.isEmpty && !body.exists(_.is_error) => |
146 if !body.isEmpty && !body.exists(_.is_error) => |
137 (body.init.filterNot(_.is_space), body.last) match { |
147 (body.init.filterNot(_.is_space), body.last) match { |
138 case (Token(Token.Kind.IDENT, id) :: Token(Token.Kind.KEYWORD, "{") :: toks, |
148 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks, |
139 Token(Token.Kind.KEYWORD, "}")) => (id, toks) |
149 Token(Token.Kind.KEYWORD, "}")) => Some(toks) |
140 case (Token(Token.Kind.IDENT, id) :: Token(Token.Kind.KEYWORD, "(") :: toks, |
150 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks, |
141 Token(Token.Kind.KEYWORD, ")")) => (id, toks) |
151 Token(Token.Kind.KEYWORD, ")")) => Some(toks) |
142 case _ => ("", Nil) |
152 case _ => None |
143 } |
153 } |
144 case _ => ("", Nil) |
154 case _ => None |
145 } |
155 } |
146 override def kind: String = content._1 |
156 def is_wellformed: Boolean = kind != "" && wellformed_content.isDefined |
147 def content_tokens: List[Token] = content._2 |
157 def content_tokens: List[Token] = wellformed_content getOrElse Nil |
148 |
|
149 def is_wellformed: Boolean = kind != "" |
|
150 |
158 |
151 def name: String = |
159 def name: String = |
152 content_tokens match { |
160 content_tokens match { |
153 case Token(Token.Kind.IDENT, name) :: _ if is_wellformed => name |
161 case Token(Token.Kind.IDENT, id) :: _ if is_wellformed => id |
154 case _ => "" |
162 case _ => "" |
155 } |
163 } |
156 } |
164 } |
157 |
165 |
158 |
166 |
159 |
167 |
160 /** parsing **/ |
168 /** parsing **/ |
161 |
169 |
162 // context of partial line-oriented scans |
170 // context of partial line-oriented scans |
163 abstract class Line_Context |
171 abstract class Line_Context |
164 case class Delimited(quoted: Boolean, depth: Int) extends Line_Context |
172 case object Ignored_Context extends Line_Context |
165 val Finished = Delimited(false, 0) |
173 case class Item_Context(kind: String, delim: Delimited, right: String) extends Line_Context |
|
174 case class Delimited(quoted: Boolean, depth: Int) |
|
175 val Closed = Delimited(false, 0) |
166 |
176 |
167 private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) |
177 private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) |
168 private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) |
178 private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) |
169 |
179 |
170 |
180 |
178 override val whiteSpace = "".r |
188 override val whiteSpace = "".r |
179 |
189 |
180 private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) |
190 private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) |
181 private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.SPACE) |
191 private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.SPACE) |
182 |
192 |
183 private val ignored = |
193 |
184 rep1("""(?mi)([^@]+|@[ \t\n\r]*comment)""".r) ^^ { case ss => Ignored(ss.mkString) } |
194 /* ignored material outside items */ |
|
195 |
|
196 private val ignored: Parser[Chunk] = |
|
197 rep1("""(?mi)([^@]+|@[ \t]*comment)""".r) ^^ { case ss => Ignored(ss.mkString) } |
|
198 |
|
199 private def ignored_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = |
|
200 ctxt match { |
|
201 case Ignored_Context => ignored ^^ { case a => (a, ctxt) } |
|
202 case _ => failure("") |
|
203 } |
185 |
204 |
186 |
205 |
187 /* delimited string: outermost "..." or {...} and body with balanced {...} */ |
206 /* delimited string: outermost "..." or {...} and body with balanced {...} */ |
188 |
207 |
189 private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = |
208 private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = |
208 else if (c == '}' && d > 0) { i += 1; d -= 1; if (d == 0) finished = true } |
227 else if (c == '}' && d > 0) { i += 1; d -= 1; if (d == 0) finished = true } |
209 else if (d > 0) i += 1 |
228 else if (d > 0) i += 1 |
210 else finished = true |
229 else finished = true |
211 } |
230 } |
212 if (i == start) Failure("bad input", in) |
231 if (i == start) Failure("bad input", in) |
213 else |
232 else { |
214 Success((in.source.subSequence(start, i).toString, |
233 val s = in.source.subSequence(start, i).toString |
215 Delimited(q, d)), in.drop(i - start)) |
234 Success((s, Delimited(q, d)), in.drop(i - start)) |
|
235 } |
216 } |
236 } |
217 }.named("delimited_depth") |
237 }.named("delimited_depth") |
218 |
238 |
219 private def delimited: Parser[String] = |
239 private def delimited: Parser[Token] = |
220 delimited_depth(Finished) ^? { case (x, delim) if delim == Finished => x } |
240 delimited_depth(Closed) ^? |
221 |
241 { case (s, delim) if delim == Closed => Token(Token.Kind.STRING, s) } |
222 private def delimited_line(ctxt: Line_Context): Parser[(String, Line_Context)] = |
242 |
|
243 private def delimited_line(ctxt: Line_Context): Parser[(Item, Line_Context)] = |
223 { |
244 { |
224 ctxt match { |
245 ctxt match { |
225 case delim: Delimited => delimited_depth(delim) |
246 case Item_Context(kind, delim, right) => |
|
247 delimited_depth(delim) ^^ { case (s, delim1) => |
|
248 (Item(kind, List(Token(Token.Kind.STRING, s))), Item_Context(kind, delim1, right)) } |
226 case _ => failure("") |
249 case _ => failure("") |
227 } |
250 } |
228 } |
251 } |
229 |
252 |
230 private val recover_delimited: Parser[String] = |
253 private def recover_delimited: Parser[Token] = |
231 delimited_depth(Finished) ^^ (_._1) |
254 """(?m)["{][^@]+""".r ^^ token(Token.Kind.ERROR) |
232 |
|
233 private val delimited_token = |
|
234 delimited ^^ token(Token.Kind.STRING) | |
|
235 recover_delimited ^^ token(Token.Kind.ERROR) |
|
236 |
255 |
237 |
256 |
238 /* other tokens */ |
257 /* other tokens */ |
239 |
258 |
240 private val at = "@" ^^ keyword |
259 private val at = "@" ^^ keyword |
246 private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
265 private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
247 |
266 |
248 private val ident = |
267 private val ident = |
249 """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r ^^ token(Token.Kind.IDENT) |
268 """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r ^^ token(Token.Kind.IDENT) |
250 |
269 |
|
270 val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
|
271 |
|
272 |
|
273 /* items */ |
|
274 |
|
275 private val item_start: Parser[(String, List[Token])] = |
|
276 at ~ rep(strict_space) ~ ident ~ rep(strict_space) ^^ |
|
277 { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } |
|
278 |
|
279 private val item_body = |
|
280 delimited | (recover_delimited | other_token) |
|
281 |
|
282 private val item: Parser[Item] = |
|
283 (item_start ~ left_brace ~ rep(item_body) ~ opt(right_brace) | |
|
284 item_start ~ left_paren ~ rep(item_body) ~ opt(right_paren)) ^^ |
|
285 { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) } |
|
286 |
|
287 private val recover_item: Parser[Item] = |
|
288 at ~ "(?m)[^@]+".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) } |
|
289 |
|
290 def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] = |
|
291 { |
|
292 ctxt match { |
|
293 case Ignored_Context => |
|
294 item_start ~ (left_brace | left_paren) ^^ |
|
295 { case (kind, a) ~ b => |
|
296 val right = if (b.source == "{") "}" else ")" |
|
297 (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) } |
|
298 case Item_Context(kind, delim, right) => |
|
299 if (delim.depth > 0) |
|
300 delimited_line(ctxt) |
|
301 else { |
|
302 delimited_line(ctxt) | |
|
303 other_token ^^ { case a => (Item(kind, List(a)), ctxt) } | |
|
304 right ^^ { case a => (Item(kind, List(keyword(a))), Ignored_Context) } |
|
305 } |
|
306 case _ => failure("") |
|
307 } |
|
308 } |
|
309 |
251 |
310 |
252 /* chunks */ |
311 /* chunks */ |
253 |
312 |
254 private val item_start = |
313 val chunk: Parser[Chunk] = ignored | (item | recover_item) |
255 at ~ rep(strict_space) ~ ident ~ rep(strict_space) ^^ |
314 |
256 { case a ~ b ~ c ~ d => List(a) ::: b ::: List(c) ::: d } |
315 def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = |
257 |
316 ignored_line(ctxt) | item_line(ctxt) |
258 private val body_token = delimited_token | ("[=#,]".r ^^ keyword | (nat | (ident | space))) |
317 } |
259 |
318 |
260 private val item = |
319 |
261 (item_start ~ left_brace ~ rep(body_token) ~ opt(right_brace) | |
320 /* parse */ |
262 item_start ~ left_paren ~ rep(body_token) ~ opt(right_paren)) ^^ |
|
263 { case a ~ b ~ c ~ d => Item(a ::: List(b) ::: c ::: d.toList) } |
|
264 |
|
265 private val recover_item = "(?m)@[^@]+".r ^^ (s => Malformed(s)) |
|
266 |
|
267 val chunks: Parser[List[Chunk]] = rep(ignored | (item | recover_item)) |
|
268 } |
|
269 |
321 |
270 def parse(input: CharSequence): List[Chunk] = |
322 def parse(input: CharSequence): List[Chunk] = |
271 { |
323 { |
272 val in: Reader[Char] = new CharSequenceReader(input) |
324 val in: Reader[Char] = new CharSequenceReader(input) |
273 Parsers.parseAll(Parsers.chunks, in) match { |
325 Parsers.parseAll(Parsers.rep(Parsers.chunk), in) match { |
274 case Parsers.Success(result, _) => result |
326 case Parsers.Success(result, _) => result |
275 case _ => error("Unexpected failure to parse input:\n" + input.toString) |
327 case _ => error("Unexpected failure to parse input:\n" + input.toString) |
276 } |
328 } |
277 } |
329 } |
|
330 |
|
331 def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = |
|
332 { |
|
333 var in: Reader[Char] = new CharSequenceReader(input) |
|
334 val chunks = new mutable.ListBuffer[Chunk] |
|
335 var ctxt = context |
|
336 while (!in.atEnd) { |
|
337 Parsers.parse(Parsers.chunk_line(ctxt), in) match { |
|
338 case Parsers.Success((x, c), rest) => { chunks += x; ctxt = c; in = rest } |
|
339 case Parsers.NoSuccess(_, rest) => |
|
340 error("Unepected failure to parse input:\n" + rest.source.toString) |
|
341 } |
|
342 } |
|
343 (chunks.toList, ctxt) |
|
344 } |
278 } |
345 } |
279 |
346 |