author  wenzelm 
Sun, 20 Dec 2009 17:47:59 +0100  
changeset 34143  ded454429df3 
parent 34140  31be1235d0fb 
child 34144  bd7b3b91abab 
permissions  rwrr 
31648  1 
/* Title: Pure/General/scan.scala 
2 
Author: Makarius 

3 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

4 
Efficient scanning of keywords. 
31648  5 
*/ 
6 

7 
package isabelle 

8 

9 
import scala.util.parsing.combinator.RegexParsers 

10 

11 

12 
object Scan 

13 
{ 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

14 
/** Lexicon  position tree **/ 
31648  15 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

16 
object Lexicon 
31648  17 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

18 
private case class Tree(val branches: Map[Char, (String, Tree)]) 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

19 
private val empty_tree = Tree(Map()) 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

20 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

21 
val empty: Lexicon = new Lexicon 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

22 
def apply(elems: String*): Lexicon = empty ++ elems 
31648  23 
} 
24 

31764  25 
class Lexicon extends scala.collection.Set[String] with RegexParsers 
31648  26 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

27 
/* representation */ 
31648  28 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

29 
import Lexicon.Tree 
31764  30 
protected val main_tree: Tree = Lexicon.empty_tree 
31648  31 

32 

31650  33 
/* auxiliary operations */ 
34 

35 
private def content(tree: Tree, result: List[String]): List[String] = 

36 
(result /: tree.branches.toList) ((res, entry) => 

37 
entry match { case (_, (s, tr)) => 

38 
if (s.isEmpty) content(tr, res) else content(tr, s :: res) }) 

39 

40 
private def lookup(str: CharSequence): Option[(Boolean, Tree)] = 

41 
{ 

42 
val len = str.length 

43 
def look(tree: Tree, tip: Boolean, i: Int): Option[(Boolean, Tree)] = 

44 
{ 

45 
if (i < len) { 

46 
tree.branches.get(str.charAt(i)) match { 

47 
case Some((s, tr)) => look(tr, !s.isEmpty, i + 1) 

48 
case None => None 

49 
} 

50 
} else Some(tip, tree) 

51 
} 

52 
look(main_tree, false, 0) 

53 
} 

54 

55 
def completions(str: CharSequence): List[String] = 

31764  56 
lookup(str) match { 
31650  57 
case Some((true, tree)) => content(tree, List(str.toString)) 
58 
case Some((false, tree)) => content(tree, Nil) 

59 
case None => Nil 

31764  60 
} 
31650  61 

62 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

63 
/* Set methods */ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

64 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

65 
override def stringPrefix = "Lexicon" 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

66 

31753  67 
override def isEmpty: Boolean = { main_tree.branches.isEmpty } 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

68 

31650  69 
def size: Int = content(main_tree, Nil).length 
70 
def elements: Iterator[String] = content(main_tree, Nil).sort(_ <= _).elements 

31648  71 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

72 
def contains(elem: String): Boolean = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

73 
lookup(elem) match { 
31650  74 
case Some((tip, _)) => tip 
75 
case _ => false 

76 
} 

31648  77 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

78 
def + (elem: String): Lexicon = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

79 
if (contains(elem)) this 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

80 
else { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

81 
val len = elem.length 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

82 
def extend(tree: Tree, i: Int): Tree = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

83 
if (i < len) { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

84 
val c = elem.charAt(i) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

85 
val end = (i + 1 == len) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

86 
tree.branches.get(c) match { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

87 
case Some((s, tr)) => 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

88 
Tree(tree.branches + 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

89 
(c > (if (end) elem else s, extend(tr, i + 1)))) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

90 
case None => 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

91 
Tree(tree.branches + 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

92 
(c > (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

93 
} 
34135  94 
} 
95 
else tree 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

96 
val old = this 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

97 
new Lexicon { override val main_tree = extend(old.main_tree, 0) } 
31648  98 
} 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

99 

31764  100 
def + (elem1: String, elem2: String, elems: String*): Lexicon = 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

101 
this + elem1 + elem2 ++ elems 
31764  102 
def ++ (elems: Iterable[String]): Lexicon = (this /: elems) ((s, elem) => s + elem) 
103 
def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem) 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

104 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

105 

34137  106 
/** RegexParsers methods **/ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

107 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

108 
override val whiteSpace = "".r 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

109 

34137  110 

111 
/* keywords from lexicon */ 

112 

34140  113 
def keyword: Parser[String] = new Parser[String] 
34135  114 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

115 
def apply(in: Input) = 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

116 
{ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

117 
val source = in.source 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

118 
val offset = in.offset 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

119 
val len = source.length  offset 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

120 

34140  121 
def scan(tree: Tree, result: String, i: Int): String = 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

122 
{ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

123 
if (i < len) { 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

124 
tree.branches.get(source.charAt(offset + i)) match { 
34140  125 
case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1) 
34135  126 
case None => result 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

127 
} 
34135  128 
} 
129 
else result 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

130 
} 
34140  131 
val result = scan(main_tree, "", 0) 
132 
if (result.isEmpty) Failure("keyword expected", in) 

133 
else Success(result, in.drop(result.length)) 

31648  134 
} 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

135 
}.named("keyword") 
31648  136 

34137  137 

138 
/* symbols */ 

139 

34140  140 
def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] = 
141 
new Parser[String] 

34135  142 
{ 
34137  143 
def apply(in: Input) = 
144 
{ 

145 
val start = in.offset 

146 
val end = in.source.length 

147 
val matcher = new Symbol.Matcher(in.source) 

148 

149 
var i = start 

150 
var count = 0 

151 
var finished = false 

152 
while (!finished) { 

153 
if (i < end && count < max_count) { 

154 
val n = matcher(i, end) 

155 
val sym = in.source.subSequence(i, i + n).toString 

156 
if (pred(sym)) { i += n; count += 1 } 

157 
else finished = true 

158 
} 

159 
else finished = true 

160 
} 

34140  161 
if (count < min_count) Failure("bad input", in) 
162 
else Success(in.source.subSequence(start, i).toString, in.drop(i  start)) 

34137  163 
} 
164 
}.named("symbols") 

165 

34140  166 
def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1) 
167 
def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE) 

168 
def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE) 

169 

170 

34143  171 
/* quoted strings */ 
34140  172 

34143  173 
private def quoted(quote: String): Parser[String] = 
174 
{ 

34140  175 
quote ~ 
176 
rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym))  

177 
"\\" + quote  "\\\\"  

178 
(("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~ 

179 
quote ^^ { case x ~ ys ~ z => x + ys.mkString + z } 

34143  180 
}.named("quoted") 
34140  181 

182 
def quoted_content(quote: String, source: String): String = 

183 
{ 

184 
require(parseAll(quoted(quote), source).successful) 

185 
source.substring(1, source.length  1) // FIXME proper escapes, recode utf8 (!?) 

186 
} 

187 

188 

34143  189 
/* verbatim text */ 
190 

191 
private def verbatim: Parser[String] = 

192 
{ 

34140  193 
"{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym))  """\*(?!\})""".r) ~ "*}" ^^ 
194 
{ case x ~ ys ~ z => x + ys.mkString + z } 

34143  195 
}.named("verbatim") 
34140  196 

197 
def verbatim_content(source: String): String = 

198 
{ 

199 
require(parseAll(verbatim, source).successful) 

200 
source.substring(2, source.length  2) 

201 
} 

202 

203 

34143  204 
/* nested comments */ 
205 

206 
def comment: Parser[String] = new Parser[String] 

207 
{ 

208 
val comment_text = 

209 
rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym))  

210 
"""\*(?!\))\((?!\*)""".r) 

211 
val comment_open = "(*" ~ comment_text ^^ (_ => ()) 

212 
val comment_close = comment_text ~ "*)" ^^ (_ => ()) 

213 

214 
def apply(in: Input) = 

215 
{ 

216 
var rest = in 

217 
def try_parse(p: Parser[Unit]): Boolean = 

218 
{ 

219 
parse(p, rest) match { 

220 
case Success(_, next) => { rest = next; true } 

221 
case _ => false 

222 
} 

223 
} 

224 
var depth = 0 

225 
var finished = false 

226 
while (!finished) { 

227 
if (try_parse(comment_open)) depth += 1 

228 
else if (depth > 0 && try_parse(comment_close)) depth = 1 

229 
else finished = true 

230 
} 

231 
if (in.offset < rest.offset && depth == 0) 

232 
Success(in.source.subSequence(in.offset, rest.offset).toString, rest) 

233 
else Failure("comment expected", in) 

234 
} 

235 
}.named("comment") 

236 

237 
def comment_content(source: String): String = 

238 
{ 

239 
require(parseAll(comment, source).successful) 

240 
source.substring(2, source.length  2) 

241 
} 

242 

243 

34140  244 
/* outer syntax tokens */ 
245 

246 
def token(symbols: Symbol.Interpretation, is_command: String => Boolean): 

247 
Parser[Outer_Lex.Token] = 

248 
{ 

249 
import Outer_Lex._ 

250 

251 
val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y } 

252 
val nat = many1(symbols.is_digit) 

253 
val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x } 

254 

255 
val ident = id ~ rep("." ~> id) ^^ 

256 
{ case x ~ Nil => Ident(x) 

257 
case x ~ ys => Long_Ident((x :: ys).mkString(".")) } 

258 

259 
val var_ = "?" ~ id_nat ^^ { case x ~ y => Var(x + y) } 

260 
val type_ident = "'" ~ id ^^ { case x ~ y => Type_Ident(x + y) } 

261 
val type_var = "?'" ~ id_nat ^^ { case x ~ y => Type_Var(x + y) } 

262 
val nat_ = nat ^^ Nat 

263 

264 
val sym_ident = 

265 
(many1(symbols.is_symbolic_char)  

266 
one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ Sym_Ident 

267 

268 
val space = many1(symbols.is_blank) ^^ Space 

269 

270 
val string = quoted("\"") ^^ String_Token 

271 
val alt_string = quoted("`") ^^ Alt_String_Token 

272 

273 
val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input 

274 

275 

276 
/* tokens */ 

277 

278 
// FIXME rightassoc !? 

279 

34143  280 
(string  alt_string  verbatim ^^ Verbatim  comment ^^ Comment  space)  
34140  281 
((ident  var_  type_ident  type_var  nat_  sym_ident)  
282 
keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x)))  

283 
bad_input 

284 
} 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

285 
} 
31648  286 
} 