29 val CARTOUCHE = Value("cartouche") |
29 val CARTOUCHE = Value("cartouche") |
30 val SPACE = Value("white space") |
30 val SPACE = Value("white space") |
31 val COMMENT = Value("comment text") |
31 val COMMENT = Value("comment text") |
32 val ERROR = Value("bad input") |
32 val ERROR = Value("bad input") |
33 val UNPARSED = Value("unparsed input") |
33 val UNPARSED = Value("unparsed input") |
|
34 } |
|
35 |
|
36 |
|
37 /* parsers */ |
|
38 |
|
39 object Parsers extends Parsers |
|
40 |
|
41 trait Parsers extends Scan.Parsers |
|
42 { |
|
43 private def delimited_token: Parser[Token] = |
|
44 { |
|
45 val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) |
|
46 val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) |
|
47 val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
|
48 val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x)) |
|
49 val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x)) |
|
50 |
|
51 string | (alt_string | (verb | (cart | cmt))) |
|
52 } |
|
53 |
|
54 private def other_token(lexicon: Scan.Lexicon, is_command: String => Boolean) |
|
55 : Parser[Token] = |
|
56 { |
|
57 val letdigs1 = many1(Symbol.is_letdig) |
|
58 val sub = one(s => s == Symbol.sub_decoded || s == "\\<^sub>") |
|
59 val id = |
|
60 one(Symbol.is_letter) ~ |
|
61 (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^ |
|
62 { case x ~ y => x + y } |
|
63 |
|
64 val nat = many1(Symbol.is_digit) |
|
65 val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z } |
|
66 val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x } |
|
67 |
|
68 val ident = id ~ rep("." ~> id) ^^ |
|
69 { case x ~ Nil => Token(Token.Kind.IDENT, x) |
|
70 case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) } |
|
71 |
|
72 val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) } |
|
73 val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) } |
|
74 val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) } |
|
75 val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x)) |
|
76 val float = |
|
77 ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x)) |
|
78 |
|
79 val sym_ident = |
|
80 (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^ |
|
81 (x => Token(Token.Kind.SYM_IDENT, x)) |
|
82 |
|
83 val command_keyword = |
|
84 keyword(lexicon) ^^ |
|
85 (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) |
|
86 |
|
87 val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x)) |
|
88 |
|
89 val recover_delimited = |
|
90 (recover_quoted("\"") | |
|
91 (recover_quoted("`") | |
|
92 (recover_verbatim | |
|
93 (recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x)) |
|
94 |
|
95 val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x)) |
|
96 |
|
97 space | (recover_delimited | |
|
98 (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) ||| |
|
99 command_keyword) | bad)) |
|
100 } |
|
101 |
|
102 def token(lexicon: Scan.Lexicon, is_command: String => Boolean): Parser[Token] = |
|
103 delimited_token | other_token(lexicon, is_command) |
|
104 |
|
105 def token_context(lexicon: Scan.Lexicon, is_command: String => Boolean, ctxt: Scan.Context) |
|
106 : Parser[(Token, Scan.Context)] = |
|
107 { |
|
108 val string = |
|
109 quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } |
|
110 val alt_string = |
|
111 quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) } |
|
112 val verb = verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) } |
|
113 val cart = cartouche_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) } |
|
114 val cmt = comment_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) } |
|
115 val other = other_token(lexicon, is_command) ^^ { case x => (x, Scan.Finished) } |
|
116 |
|
117 string | (alt_string | (verb | (cart | (cmt | other)))) |
|
118 } |
34 } |
119 } |
35 |
120 |
36 |
121 |
37 /* token reader */ |
122 /* token reader */ |
38 |
123 |