author  wenzelm 
Sun, 20 Dec 2009 15:42:40 +0100  
changeset 34140  31be1235d0fb 
parent 34137  6cc9a0cbaf55 
child 34143  ded454429df3 
permissions  rwrr 
31648  1 
/* Title: Pure/General/scan.scala 
2 
Author: Makarius 

3 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

4 
Efficient scanning of keywords. 
31648  5 
*/ 
6 

7 
package isabelle 

8 

9 
import scala.util.parsing.combinator.RegexParsers 

10 

11 

12 
object Scan 

13 
{ 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

14 
/** Lexicon  position tree **/ 
31648  15 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

16 
object Lexicon 
31648  17 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

18 
private case class Tree(val branches: Map[Char, (String, Tree)]) 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

19 
private val empty_tree = Tree(Map()) 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

20 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

21 
val empty: Lexicon = new Lexicon 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

22 
def apply(elems: String*): Lexicon = empty ++ elems 
31648  23 
} 
24 

31764  25 
class Lexicon extends scala.collection.Set[String] with RegexParsers 
31648  26 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

27 
/* representation */ 
31648  28 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

29 
import Lexicon.Tree 
31764  30 
protected val main_tree: Tree = Lexicon.empty_tree 
31648  31 

32 

31650  33 
/* auxiliary operations */ 
34 

35 
private def content(tree: Tree, result: List[String]): List[String] = 

36 
(result /: tree.branches.toList) ((res, entry) => 

37 
entry match { case (_, (s, tr)) => 

38 
if (s.isEmpty) content(tr, res) else content(tr, s :: res) }) 

39 

40 
private def lookup(str: CharSequence): Option[(Boolean, Tree)] = 

41 
{ 

42 
val len = str.length 

43 
def look(tree: Tree, tip: Boolean, i: Int): Option[(Boolean, Tree)] = 

44 
{ 

45 
if (i < len) { 

46 
tree.branches.get(str.charAt(i)) match { 

47 
case Some((s, tr)) => look(tr, !s.isEmpty, i + 1) 

48 
case None => None 

49 
} 

50 
} else Some(tip, tree) 

51 
} 

52 
look(main_tree, false, 0) 

53 
} 

54 

55 
def completions(str: CharSequence): List[String] = 

31764  56 
lookup(str) match { 
31650  57 
case Some((true, tree)) => content(tree, List(str.toString)) 
58 
case Some((false, tree)) => content(tree, Nil) 

59 
case None => Nil 

31764  60 
} 
31650  61 

62 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

63 
/* Set methods */ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

64 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

65 
override def stringPrefix = "Lexicon" 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

66 

31753  67 
override def isEmpty: Boolean = { main_tree.branches.isEmpty } 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

68 

31650  69 
def size: Int = content(main_tree, Nil).length 
70 
def elements: Iterator[String] = content(main_tree, Nil).sort(_ <= _).elements 

31648  71 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

72 
def contains(elem: String): Boolean = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

73 
lookup(elem) match { 
31650  74 
case Some((tip, _)) => tip 
75 
case _ => false 

76 
} 

31648  77 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

78 
def + (elem: String): Lexicon = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

79 
if (contains(elem)) this 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

80 
else { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

81 
val len = elem.length 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

82 
def extend(tree: Tree, i: Int): Tree = 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

83 
if (i < len) { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

84 
val c = elem.charAt(i) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

85 
val end = (i + 1 == len) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

86 
tree.branches.get(c) match { 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

87 
case Some((s, tr)) => 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

88 
Tree(tree.branches + 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

89 
(c > (if (end) elem else s, extend(tr, i + 1)))) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

90 
case None => 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

91 
Tree(tree.branches + 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

92 
(c > (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

93 
} 
34135  94 
} 
95 
else tree 

31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

96 
val old = this 
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

97 
new Lexicon { override val main_tree = extend(old.main_tree, 0) } 
31648  98 
} 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

99 

31764  100 
def + (elem1: String, elem2: String, elems: String*): Lexicon = 
31762
20745ab5b79a
more precise implementation of trait methods  oddly this seems to require copy/paste for +, ++;
wenzelm
parents:
31753
diff
changeset

101 
this + elem1 + elem2 ++ elems 
31764  102 
def ++ (elems: Iterable[String]): Lexicon = (this /: elems) ((s, elem) => s + elem) 
103 
def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem) 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

104 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

105 

34137  106 
/** RegexParsers methods **/ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

107 

a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

108 
override val whiteSpace = "".r 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

109 

34137  110 

111 
/* keywords from lexicon */ 

112 

34140  113 
def keyword: Parser[String] = new Parser[String] 
34135  114 
{ 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

115 
def apply(in: Input) = 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

116 
{ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

117 
val source = in.source 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

118 
val offset = in.offset 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

119 
val len = source.length  offset 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

120 

34140  121 
def scan(tree: Tree, result: String, i: Int): String = 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

122 
{ 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

123 
if (i < len) { 
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

124 
tree.branches.get(source.charAt(offset + i)) match { 
34140  125 
case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1) 
34135  126 
case None => result 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

127 
} 
34135  128 
} 
129 
else result 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

130 
} 
34140  131 
val result = scan(main_tree, "", 0) 
132 
if (result.isEmpty) Failure("keyword expected", in) 

133 
else Success(result, in.drop(result.length)) 

31648  134 
} 
31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

135 
}.named("keyword") 
31648  136 

34137  137 

138 
/* symbols */ 

139 

34140  140 
def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] = 
141 
new Parser[String] 

34135  142 
{ 
34137  143 
def apply(in: Input) = 
144 
{ 

145 
val start = in.offset 

146 
val end = in.source.length 

147 
val matcher = new Symbol.Matcher(in.source) 

148 

149 
var i = start 

150 
var count = 0 

151 
var finished = false 

152 
while (!finished) { 

153 
if (i < end && count < max_count) { 

154 
val n = matcher(i, end) 

155 
val sym = in.source.subSequence(i, i + n).toString 

156 
if (pred(sym)) { i += n; count += 1 } 

157 
else finished = true 

158 
} 

159 
else finished = true 

160 
} 

34140  161 
if (count < min_count) Failure("bad input", in) 
162 
else Success(in.source.subSequence(start, i).toString, in.drop(i  start)) 

34137  163 
} 
164 
}.named("symbols") 

165 

34140  166 
def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1) 
167 
def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE) 

168 
def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE) 

169 

170 

171 
/* delimited text */ 

172 

173 
def quoted(quote: String) = 

174 
quote ~ 

175 
rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym))  

176 
"\\" + quote  "\\\\"  

177 
(("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~ 

178 
quote ^^ { case x ~ ys ~ z => x + ys.mkString + z } 

179 

180 
def quoted_content(quote: String, source: String): String = 

181 
{ 

182 
require(parseAll(quoted(quote), source).successful) 

183 
source.substring(1, source.length  1) // FIXME proper escapes, recode utf8 (!?) 

184 
} 

185 

186 

187 
def verbatim = 

188 
"{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym))  """\*(?!\})""".r) ~ "*}" ^^ 

189 
{ case x ~ ys ~ z => x + ys.mkString + z } 

190 

191 
def verbatim_content(source: String): String = 

192 
{ 

193 
require(parseAll(verbatim, source).successful) 

194 
source.substring(2, source.length  2) 

195 
} 

196 

197 

198 
/* outer syntax tokens */ 

199 

200 
def token(symbols: Symbol.Interpretation, is_command: String => Boolean): 

201 
Parser[Outer_Lex.Token] = 

202 
{ 

203 
import Outer_Lex._ 

204 

205 
val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y } 

206 
val nat = many1(symbols.is_digit) 

207 
val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x } 

208 

209 
val ident = id ~ rep("." ~> id) ^^ 

210 
{ case x ~ Nil => Ident(x) 

211 
case x ~ ys => Long_Ident((x :: ys).mkString(".")) } 

212 

213 
val var_ = "?" ~ id_nat ^^ { case x ~ y => Var(x + y) } 

214 
val type_ident = "'" ~ id ^^ { case x ~ y => Type_Ident(x + y) } 

215 
val type_var = "?'" ~ id_nat ^^ { case x ~ y => Type_Var(x + y) } 

216 
val nat_ = nat ^^ Nat 

217 

218 
val sym_ident = 

219 
(many1(symbols.is_symbolic_char)  

220 
one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ Sym_Ident 

221 

222 
val space = many1(symbols.is_blank) ^^ Space 

223 

224 
val string = quoted("\"") ^^ String_Token 

225 
val alt_string = quoted("`") ^^ Alt_String_Token 

226 

227 
val comment: Parser[Token] = failure("FIXME") 

228 

229 
val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input 

230 

231 

232 
/* tokens */ 

233 

234 
// FIXME rightassoc !? 

235 

236 
(string  alt_string  verbatim ^^ Verbatim  comment  space)  

237 
((ident  var_  type_ident  type_var  nat_  sym_ident)  

238 
keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x)))  

239 
bad_input 

240 
} 

31649
a11ea667d676
reorganized and abstracted version, via Set trait;
wenzelm
parents:
31648
diff
changeset

241 
} 
31648  242 
} 
243 