| author | wenzelm | 
| Sun, 03 Apr 2016 23:28:48 +0200 | |
| changeset 62839 | ea9f12e422c7 | 
| parent 60215 | 5fb4990dfc73 | 
| child 63579 | 73939a9b70a3 | 
| permissions | -rw-r--r-- | 
| 31648 | 1 | /* Title: Pure/General/scan.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40290diff
changeset | 4 | Efficient scanning of keywords and tokens. | 
| 31648 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 34187 
7b659c1561f1
added byte_reader, which works without decoding and enables efficient length operation (for scala.util.parsing.input.Reader);
 wenzelm parents: 
34157diff
changeset | 9 | |
| 55980 | 10 | import scala.annotation.tailrec | 
| 46627 
fbe2cb05bdb3
avoid trait Addable, which is deprecated in scala-2.9.x;
 wenzelm parents: 
45900diff
changeset | 11 | import scala.collection.{IndexedSeq, TraversableOnce}
 | 
| 34187 
7b659c1561f1
added byte_reader, which works without decoding and enables efficient length operation (for scala.util.parsing.input.Reader);
 wenzelm parents: 
34157diff
changeset | 12 | import scala.collection.immutable.PagedSeq | 
| 
7b659c1561f1
added byte_reader, which works without decoding and enables efficient length operation (for scala.util.parsing.input.Reader);
 wenzelm parents: 
34157diff
changeset | 13 | import scala.util.parsing.input.{OffsetPosition, Position => InputPosition, Reader}
 | 
| 31648 | 14 | import scala.util.parsing.combinator.RegexParsers | 
| 15 | ||
| 56822 | 16 | import java.io.{File => JFile, BufferedInputStream, FileInputStream, InputStream}
 | 
| 17 | import java.net.URL | |
| 34187 
7b659c1561f1
added byte_reader, which works without decoding and enables efficient length operation (for scala.util.parsing.input.Reader);
 wenzelm parents: 
34157diff
changeset | 18 | |
| 31648 | 19 | |
| 20 | object Scan | |
| 21 | {
 | |
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 22 | /** context of partial line-oriented scans **/ | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 23 | |
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 24 | abstract class Line_Context | 
| 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 25 | case object Finished extends Line_Context | 
| 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 26 | case class Quoted(quote: String) extends Line_Context | 
| 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 27 | case object Verbatim extends Line_Context | 
| 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 28 | case class Cartouche(depth: Int) extends Line_Context | 
| 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 29 | case class Comment(depth: Int) extends Line_Context | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 30 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 31 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 32 | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 33 | /** parser combinators **/ | 
| 31650 | 34 | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 35 | object Parsers extends Parsers | 
| 36011 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 wenzelm parents: 
34316diff
changeset | 36 | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 37 | trait Parsers extends RegexParsers | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 38 |   {
 | 
| 31649 
a11ea667d676
reorganized and abstracted version, via Set trait;
 wenzelm parents: 
31648diff
changeset | 39 | override val whiteSpace = "".r | 
| 
a11ea667d676
reorganized and abstracted version, via Set trait;
 wenzelm parents: 
31648diff
changeset | 40 | |
| 34137 | 41 | |
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 42 | /* optional termination */ | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 43 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 44 | def opt_term[T](p: => Parser[T]): Parser[Option[T]] = | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 45 | p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None) | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 46 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 47 | |
| 55497 | 48 | /* repeated symbols */ | 
| 34137 | 49 | |
| 55497 | 50 | def repeated(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] = | 
| 34140 | 51 | new Parser[String] | 
| 34135 | 52 |       {
 | 
| 34137 | 53 | def apply(in: Input) = | 
| 54 |         {
 | |
| 55 | val start = in.offset | |
| 56 | val end = in.source.length | |
| 57 | val matcher = new Symbol.Matcher(in.source) | |
| 58 | ||
| 59 | var i = start | |
| 60 | var count = 0 | |
| 61 | var finished = false | |
| 55034 | 62 |           while (!finished && i < end && count < max_count) {
 | 
| 63 | val n = matcher(i, end) | |
| 64 | val sym = in.source.subSequence(i, i + n).toString | |
| 65 |             if (pred(sym)) { i += n; count += 1 }
 | |
| 34137 | 66 | else finished = true | 
| 67 | } | |
| 34140 | 68 |           if (count < min_count) Failure("bad input", in)
 | 
| 69 | else Success(in.source.subSequence(start, i).toString, in.drop(i - start)) | |
| 34137 | 70 | } | 
| 55497 | 71 |       }.named("repeated")
 | 
| 34137 | 72 | |
| 43696 | 73 | def one(pred: Symbol.Symbol => Boolean): Parser[String] = | 
| 55497 | 74 | repeated(pred, 1, 1) | 
| 43696 | 75 | |
| 76 | def many(pred: Symbol.Symbol => Boolean): Parser[String] = | |
| 55497 | 77 | repeated(pred, 0, Integer.MAX_VALUE) | 
| 43696 | 78 | |
| 79 | def many1(pred: Symbol.Symbol => Boolean): Parser[String] = | |
| 55497 | 80 | repeated(pred, 1, Integer.MAX_VALUE) | 
| 81 | ||
| 82 | ||
| 83 | /* character */ | |
| 84 | ||
| 85 | def character(pred: Char => Boolean): Symbol.Symbol => Boolean = | |
| 86 | (s: Symbol. Symbol) => s.length == 1 && pred(s.charAt(0)) | |
| 34140 | 87 | |
| 88 | ||
| 34143 | 89 | /* quoted strings */ | 
| 34140 | 90 | |
| 43696 | 91 | private def quoted_body(quote: Symbol.Symbol): Parser[String] = | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 92 |     {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 93 | rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" | | 
| 60215 | 94 |         ("""\\\d\d\d""".r ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 95 | } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 96 | |
| 55494 | 97 | def quoted(quote: Symbol.Symbol): Parser[String] = | 
| 34143 | 98 |     {
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 99 |       quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
 | 
| 34143 | 100 |     }.named("quoted")
 | 
| 34140 | 101 | |
| 43696 | 102 | def quoted_content(quote: Symbol.Symbol, source: String): String = | 
| 34140 | 103 |     {
 | 
| 104 | require(parseAll(quoted(quote), source).successful) | |
| 34189 | 105 | val body = source.substring(1, source.length - 1) | 
| 106 |       if (body.exists(_ == '\\')) {
 | |
| 107 | val content = | |
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40290diff
changeset | 108 | rep(many1(sym => sym != quote && sym != "\\") | | 
| 34189 | 109 |               "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
 | 
| 110 | parseAll(content ^^ (_.mkString), body).get | |
| 111 | } | |
| 112 | else body | |
| 34140 | 113 | } | 
| 114 | ||
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 115 | def quoted_line(quote: Symbol.Symbol, ctxt: Line_Context): Parser[(String, Line_Context)] = | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 116 |     {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 117 |       ctxt match {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 118 | case Finished => | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 119 | quote ~ quoted_body(quote) ~ opt_term(quote) ^^ | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 120 |             { case x ~ y ~ Some(z) => (x + y + z, Finished)
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 121 | case x ~ y ~ None => (x + y, Quoted(quote)) } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 122 | case Quoted(q) if q == quote => | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 123 | quoted_body(quote) ~ opt_term(quote) ^^ | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 124 |             { case x ~ Some(y) => (x + y, Finished)
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 125 | case x ~ None => (x, ctxt) } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 126 |         case _ => failure("")
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 127 | } | 
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 128 |     }.named("quoted_line")
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 129 | |
| 48763 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 130 | def recover_quoted(quote: Symbol.Symbol): Parser[String] = | 
| 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 131 |       quote ~ quoted_body(quote) ^^ { case x ~ y => x + y }
 | 
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48409diff
changeset | 132 | |
| 34140 | 133 | |
| 34143 | 134 | /* verbatim text */ | 
| 135 | ||
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 136 | private def verbatim_body: Parser[String] = | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 137 | rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString) | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 138 | |
| 55494 | 139 | def verbatim: Parser[String] = | 
| 34143 | 140 |     {
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 141 |       "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
 | 
| 34143 | 142 |     }.named("verbatim")
 | 
| 34140 | 143 | |
| 144 | def verbatim_content(source: String): String = | |
| 145 |     {
 | |
| 146 | require(parseAll(verbatim, source).successful) | |
| 147 | source.substring(2, source.length - 2) | |
| 148 | } | |
| 149 | ||
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 150 | def verbatim_line(ctxt: Line_Context): Parser[(String, Line_Context)] = | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 151 |     {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 152 |       ctxt match {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 153 | case Finished => | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 154 |           "{*" ~ verbatim_body ~ opt_term("*}") ^^
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 155 |             { case x ~ y ~ Some(z) => (x + y + z, Finished)
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 156 | case x ~ y ~ None => (x + y, Verbatim) } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 157 | case Verbatim => | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 158 |           verbatim_body ~ opt_term("*}") ^^
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 159 |             { case x ~ Some(y) => (x + y, Finished)
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 160 | case x ~ None => (x, Verbatim) } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 161 |         case _ => failure("")
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 162 | } | 
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 163 |     }.named("verbatim_line")
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
42718diff
changeset | 164 | |
| 48763 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 165 | val recover_verbatim: Parser[String] = | 
| 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 166 |       "{*" ~ verbatim_body ^^ { case x ~ y => x + y }
 | 
| 48743 
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
 wenzelm parents: 
48409diff
changeset | 167 | |
| 34140 | 168 | |
| 55033 | 169 | /* nested text cartouches */ | 
| 170 | ||
| 171 | private def cartouche_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)] | |
| 172 |     {
 | |
| 173 | require(depth >= 0) | |
| 174 | ||
| 175 | def apply(in: Input) = | |
| 176 |       {
 | |
| 177 | val start = in.offset | |
| 178 | val end = in.source.length | |
| 179 | val matcher = new Symbol.Matcher(in.source) | |
| 180 | ||
| 181 | var i = start | |
| 182 | var d = depth | |
| 183 | var finished = false | |
| 55034 | 184 |         while (!finished && i < end) {
 | 
| 185 | val n = matcher(i, end) | |
| 186 | val sym = in.source.subSequence(i, i + n).toString | |
| 187 |           if (Symbol.is_open(sym)) { i += n; d += 1 }
 | |
| 58505 
d1fe47fe5401
actually finish after closing, e.g. relevant for consecutive (**)(**);
 wenzelm parents: 
56827diff
changeset | 188 |           else if (Symbol.is_close(sym) && d > 0) { i += n; d -= 1; if (d == 0) finished = true }
 | 
| 
d1fe47fe5401
actually finish after closing, e.g. relevant for consecutive (**)(**);
 wenzelm parents: 
56827diff
changeset | 189 | else if (d > 0) i += n | 
| 55033 | 190 | else finished = true | 
| 191 | } | |
| 192 |         if (i == start) Failure("bad input", in)
 | |
| 193 | else Success((in.source.subSequence(start, i).toString, d), in.drop(i - start)) | |
| 194 | } | |
| 195 |     }.named("cartouche_depth")
 | |
| 196 | ||
| 197 | def cartouche: Parser[String] = | |
| 198 |       cartouche_depth(0) ^? { case (x, d) if d == 0 => x }
 | |
| 199 | ||
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 200 | def cartouche_line(ctxt: Line_Context): Parser[(String, Line_Context)] = | 
| 55033 | 201 |     {
 | 
| 202 | val depth = | |
| 203 |         ctxt match {
 | |
| 204 | case Finished => 0 | |
| 205 | case Cartouche(d) => d | |
| 206 | case _ => -1 | |
| 207 | } | |
| 208 | if (depth >= 0) | |
| 209 | cartouche_depth(depth) ^^ | |
| 210 |           { case (x, 0) => (x, Finished)
 | |
| 211 | case (x, d) => (x, Cartouche(d)) } | |
| 212 |       else failure("")
 | |
| 213 | } | |
| 214 | ||
| 215 | val recover_cartouche: Parser[String] = | |
| 216 | cartouche_depth(0) ^^ (_._1) | |
| 217 | ||
| 218 | def cartouche_content(source: String): String = | |
| 219 |     {
 | |
| 220 |       def err(): Nothing = error("Malformed text cartouche: " + quote(source))
 | |
| 221 | val source1 = | |
| 222 | Library.try_unprefix(Symbol.open_decoded, source) orElse | |
| 223 | Library.try_unprefix(Symbol.open, source) getOrElse err() | |
| 224 | Library.try_unsuffix(Symbol.close_decoded, source1) orElse | |
| 225 | Library.try_unsuffix(Symbol.close, source1) getOrElse err() | |
| 226 | } | |
| 227 | ||
| 228 | ||
| 34143 | 229 | /* nested comments */ | 
| 230 | ||
| 43412 | 231 | private def comment_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)] | 
| 34143 | 232 |     {
 | 
| 43412 | 233 | require(depth >= 0) | 
| 234 | ||
| 34143 | 235 | val comment_text = | 
| 42441 
781c622af16a
more robust scanning of iterated comments, such as "(* (**) (**) *)";
 wenzelm parents: 
40523diff
changeset | 236 |         rep1(many1(sym => sym != "*" && sym != "(") | """\*(?!\))|\((?!\*)""".r)
 | 
| 34143 | 237 | |
| 238 | def apply(in: Input) = | |
| 239 |       {
 | |
| 240 | var rest = in | |
| 42441 
781c622af16a
more robust scanning of iterated comments, such as "(* (**) (**) *)";
 wenzelm parents: 
40523diff
changeset | 241 | def try_parse[A](p: Parser[A]): Boolean = | 
| 34143 | 242 |         {
 | 
| 56663 
2d09b437c168
avoid "Adaptation of argument list by inserting ()" -- deprecated in scala-2.11.0;
 wenzelm parents: 
55980diff
changeset | 243 |           parse(p ^^^ (()), rest) match {
 | 
| 34143 | 244 |             case Success(_, next) => { rest = next; true }
 | 
| 245 | case _ => false | |
| 246 | } | |
| 247 | } | |
| 43412 | 248 | var d = depth | 
| 34143 | 249 | var finished = false | 
| 250 |         while (!finished) {
 | |
| 43412 | 251 |           if (try_parse("(*")) d += 1
 | 
| 58505 
d1fe47fe5401
actually finish after closing, e.g. relevant for consecutive (**)(**);
 wenzelm parents: 
56827diff
changeset | 252 |           else if (d > 0 && try_parse("*)")) { d -= 1; if (d == 0) finished = true }
 | 
| 43412 | 253 | else if (d == 0 || !try_parse(comment_text)) finished = true | 
| 34143 | 254 | } | 
| 43412 | 255 | if (in.offset < rest.offset) | 
| 256 | Success((in.source.subSequence(in.offset, rest.offset).toString, d), rest) | |
| 34143 | 257 |         else Failure("comment expected", in)
 | 
| 258 | } | |
| 43412 | 259 |     }.named("comment_depth")
 | 
| 260 | ||
| 261 | def comment: Parser[String] = | |
| 262 |       comment_depth(0) ^? { case (x, d) if d == 0 => x }
 | |
| 263 | ||
| 55510 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 wenzelm parents: 
55499diff
changeset | 264 | def comment_line(ctxt: Line_Context): Parser[(String, Line_Context)] = | 
| 43412 | 265 |     {
 | 
| 266 | val depth = | |
| 267 |         ctxt match {
 | |
| 268 | case Finished => 0 | |
| 269 | case Comment(d) => d | |
| 270 | case _ => -1 | |
| 271 | } | |
| 272 | if (depth >= 0) | |
| 273 | comment_depth(depth) ^^ | |
| 274 |           { case (x, 0) => (x, Finished)
 | |
| 275 | case (x, d) => (x, Comment(d)) } | |
| 276 |       else failure("")
 | |
| 277 | } | |
| 34143 | 278 | |
| 48763 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 279 | val recover_comment: Parser[String] = | 
| 
de68fc11c245
more precise recover_quoted, recover_verbatim, recover_comment (cf. ML version) -- NB: context parsers expect explicit termination;
 wenzelm parents: 
48754diff
changeset | 280 | comment_depth(0) ^^ (_._1) | 
| 34143 | 281 | |
| 55033 | 282 | def comment_content(source: String): String = | 
| 283 |     {
 | |
| 284 | require(parseAll(comment, source).successful) | |
| 285 | source.substring(2, source.length - 2) | |
| 286 | } | |
| 287 | ||
| 34143 | 288 | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 289 | /* keyword */ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 290 | |
| 55497 | 291 | def literal(lexicon: Lexicon): Parser[String] = new Parser[String] | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 292 |     {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 293 | def apply(in: Input) = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 294 |       {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 295 | val result = lexicon.scan(in) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 296 |         if (result.isEmpty) Failure("keyword expected", in)
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 297 | else Success(result, in.drop(result.length)) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 298 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 299 |     }.named("keyword")
 | 
| 31649 
a11ea667d676
reorganized and abstracted version, via Set trait;
 wenzelm parents: 
31648diff
changeset | 300 | } | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 301 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 302 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 303 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 304 | /** Lexicon -- position tree **/ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 305 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 306 | object Lexicon | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 307 |   {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 308 | /* representation */ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 309 | |
| 60215 | 310 | private sealed case class Tree(branches: Map[Char, (String, Tree)]) | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 311 | private val empty_tree = Tree(Map()) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 312 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 313 | val empty: Lexicon = new Lexicon(empty_tree) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 314 | def apply(elems: String*): Lexicon = empty ++ elems | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 315 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 316 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 317 | final class Lexicon private(rep: Lexicon.Tree) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 318 |   {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 319 | /* auxiliary operations */ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 320 | |
| 59073 | 321 | private def dest(tree: Lexicon.Tree, result: List[String]): List[String] = | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 322 | (result /: tree.branches.toList) ((res, entry) => | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 323 |         entry match { case (_, (s, tr)) =>
 | 
| 59073 | 324 | if (s.isEmpty) dest(tr, res) else dest(tr, s :: res) }) | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 325 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 326 | private def lookup(str: CharSequence): Option[(Boolean, Lexicon.Tree)] = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 327 |     {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 328 | val len = str.length | 
| 55980 | 329 | @tailrec def look(tree: Lexicon.Tree, tip: Boolean, i: Int): Option[(Boolean, Lexicon.Tree)] = | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 330 |       {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 331 |         if (i < len) {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 332 |           tree.branches.get(str.charAt(i)) match {
 | 
| 59319 | 333 | case Some((s, tr)) => look(tr, s.nonEmpty, i + 1) | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 334 | case None => None | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 335 | } | 
| 55980 | 336 | } | 
| 337 | else Some(tip, tree) | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 338 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 339 | look(rep, false, 0) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 340 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 341 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 342 | def completions(str: CharSequence): List[String] = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 343 |       lookup(str) match {
 | 
| 59073 | 344 | case Some((true, tree)) => dest(tree, List(str.toString)) | 
| 345 | case Some((false, tree)) => dest(tree, Nil) | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 346 | case None => Nil | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 347 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 348 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 349 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 350 | /* pseudo Set methods */ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 351 | |
| 59073 | 352 | def raw_iterator: Iterator[String] = dest(rep, Nil).iterator | 
| 353 | def iterator: Iterator[String] = dest(rep, Nil).sorted.iterator | |
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 354 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 355 |     override def toString: String = iterator.mkString("Lexicon(", ", ", ")")
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 356 | |
| 59073 | 357 | def is_empty: Boolean = rep.branches.isEmpty | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 358 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 359 | def contains(elem: String): Boolean = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 360 |       lookup(elem) match {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 361 | case Some((tip, _)) => tip | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 362 | case _ => false | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 363 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 364 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 365 | |
| 59073 | 366 | /* build lexicon */ | 
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 367 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 368 | def + (elem: String): Lexicon = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 369 | if (contains(elem)) this | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 370 |       else {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 371 | val len = elem.length | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 372 | def extend(tree: Lexicon.Tree, i: Int): Lexicon.Tree = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 373 |           if (i < len) {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 374 | val c = elem.charAt(i) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 375 | val end = (i + 1 == len) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 376 |             tree.branches.get(c) match {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 377 | case Some((s, tr)) => | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 378 | Lexicon.Tree(tree.branches + | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 379 | (c -> (if (end) elem else s, extend(tr, i + 1)))) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 380 | case None => | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 381 | Lexicon.Tree(tree.branches + | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 382 | (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 383 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 384 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 385 | else tree | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 386 | new Lexicon(extend(rep, 0)) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 387 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 388 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 389 | def ++ (elems: TraversableOnce[String]): Lexicon = (this /: elems)(_ + _) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 390 | |
| 59073 | 391 | def ++ (other: Lexicon): Lexicon = | 
| 392 | if (this eq other) this | |
| 393 | else if (is_empty) other | |
| 394 | else this ++ other.raw_iterator | |
| 395 | ||
| 55492 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 396 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 397 | /* scan */ | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 398 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 399 | def scan(in: Reader[Char]): String = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 400 |     {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 401 | val source = in.source | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 402 | val offset = in.offset | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 403 | val len = source.length - offset | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 404 | |
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 405 | def scan_tree(tree: Lexicon.Tree, result: String, i: Int): String = | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 406 |       {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 407 |         if (i < len) {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 408 |           tree.branches.get(source.charAt(offset + i)) match {
 | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 409 | case Some((s, tr)) => scan_tree(tr, if (s.isEmpty) result else s, i + 1) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 410 | case None => result | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 411 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 412 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 413 | else result | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 414 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 415 | scan_tree(rep, "", 0) | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 416 | } | 
| 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 wenzelm parents: 
55034diff
changeset | 417 | } | 
| 56821 | 418 | |
| 419 | ||
| 420 | ||
| 56822 | 421 | /** read stream without decoding: efficient length operation **/ | 
| 56821 | 422 | |
| 423 | private class Restricted_Seq(seq: IndexedSeq[Char], start: Int, end: Int) | |
| 424 | extends CharSequence | |
| 425 |   {
 | |
| 426 | def charAt(i: Int): Char = | |
| 427 | if (0 <= i && i < length) seq(start + i) | |
| 428 | else throw new IndexOutOfBoundsException | |
| 429 | ||
| 56822 | 430 | def length: Int = end - start // avoid expensive seq.length | 
| 56821 | 431 | |
| 432 | def subSequence(i: Int, j: Int): CharSequence = | |
| 433 | if (0 <= i && i <= j && j <= length) new Restricted_Seq(seq, start + i, start + j) | |
| 434 | else throw new IndexOutOfBoundsException | |
| 435 | ||
| 436 | override def toString: String = | |
| 437 |     {
 | |
| 438 | val buf = new StringBuilder(length) | |
| 439 | for (offset <- start until end) buf.append(seq(offset)) | |
| 440 | buf.toString | |
| 441 | } | |
| 442 | } | |
| 443 | ||
| 444 |   abstract class Byte_Reader extends Reader[Char] { def close: Unit }
 | |
| 445 | ||
| 56822 | 446 | private def make_byte_reader(stream: InputStream, stream_length: Int): Byte_Reader = | 
| 56821 | 447 |   {
 | 
| 56822 | 448 | val buffered_stream = new BufferedInputStream(stream) | 
| 56821 | 449 | val seq = new PagedSeq( | 
| 450 | (buf: Array[Char], offset: Int, length: Int) => | |
| 451 |         {
 | |
| 452 | var i = 0 | |
| 453 | var c = 0 | |
| 454 | var eof = false | |
| 455 |           while (!eof && i < length) {
 | |
| 56822 | 456 | c = buffered_stream.read | 
| 56821 | 457 | if (c == -1) eof = true | 
| 458 |             else { buf(offset + i) = c.toChar; i += 1 }
 | |
| 459 | } | |
| 460 | if (i > 0) i else -1 | |
| 461 | }) | |
| 56822 | 462 | val restricted_seq = new Restricted_Seq(seq, 0, stream_length) | 
| 56821 | 463 | |
| 464 | class Paged_Reader(override val offset: Int) extends Byte_Reader | |
| 465 |     {
 | |
| 466 | override lazy val source: CharSequence = restricted_seq | |
| 56827 | 467 | def first: Char = if (seq.isDefinedAt(offset)) seq(offset) else '\u001a' | 
| 56821 | 468 | def rest: Paged_Reader = if (seq.isDefinedAt(offset)) new Paged_Reader(offset + 1) else this | 
| 469 | def pos: InputPosition = new OffsetPosition(source, offset) | |
| 470 | def atEnd: Boolean = !seq.isDefinedAt(offset) | |
| 471 | override def drop(n: Int): Paged_Reader = new Paged_Reader(offset + n) | |
| 56822 | 472 |       def close { buffered_stream.close }
 | 
| 56821 | 473 | } | 
| 474 | new Paged_Reader(0) | |
| 475 | } | |
| 56822 | 476 | |
| 477 | def byte_reader(file: JFile): Byte_Reader = | |
| 478 | make_byte_reader(new FileInputStream(file), file.length.toInt) | |
| 479 | ||
| 480 | def byte_reader(url: URL): Byte_Reader = | |
| 481 |   {
 | |
| 482 | val connection = url.openConnection | |
| 483 | val stream = connection.getInputStream | |
| 484 | val stream_length = connection.getContentLength | |
| 485 | make_byte_reader(stream, stream_length) | |
| 486 | } | |
| 31648 | 487 | } |