| author | wenzelm | 
| Sun, 22 Oct 2023 12:18:23 +0200 | |
| changeset 78813 | 1829ba610c36 | 
| parent 78609 | 67492b2a3a62 | 
| child 78917 | 30e0c15a71f7 | 
| permissions | -rw-r--r-- | 
| 78316 | 1 | /* Title: Pure/General/toml.scala | 
| 78317 | 2 | Author: Fabian Huch, TU Muenchen | 
| 78316 | 3 | |
| 4 | Support for TOML: https://toml.io/en/v1.0.0 | |
| 78317 | 5 | */ | 
| 6 | ||
| 78316 | 7 | package isabelle | 
| 8 | ||
| 9 | ||
| 10 | import TOML.Parsers.Keys | |
| 11 | import TOML.Parse_Context.Seen | |
| 12 | ||
| 13 | import java.lang.Long.parseLong | |
| 14 | import java.lang.{String => Str}
 | |
| 15 | import java.time.{LocalDate, LocalDateTime, LocalTime, OffsetDateTime}
 | |
| 16 | ||
| 17 | import scala.{Boolean => Bool}
 | |
| 18 | import scala.collection.immutable.ListMap | |
| 19 | import scala.reflect.{ClassTag, classTag}
 | |
| 20 | import scala.util.Try | |
| 21 | import scala.util.matching.Regex | |
| 22 | import scala.util.parsing.combinator | |
| 23 | import scala.util.parsing.combinator.lexical.Scanners | |
| 24 | import scala.util.parsing.input.CharArrayReader.EofCh | |
| 25 | ||
| 26 | ||
| 27 | object TOML {
 | |
| 28 | /* typed representation and access */ | |
| 29 | ||
| 30 | type Key = Str | |
| 31 | ||
| 32 | sealed trait T | |
| 33 | case class String(rep: Str) extends T | |
| 34 | case class Integer(rep: Long) extends T | |
| 35 | case class Float(rep: Double) extends T | |
| 36 | case class Boolean(rep: Bool) extends T | |
| 37 | case class Offset_Date_Time(rep: OffsetDateTime) extends T | |
| 38 | case class Local_Date_Time(rep: LocalDateTime) extends T | |
| 39 | case class Local_Date(rep: LocalDate) extends T | |
| 40 | case class Local_Time(rep: LocalTime) extends T | |
| 41 | ||
| 42 |   class Array private(private val rep: List[T]) extends T {
 | |
| 43 | override def hashCode(): Int = rep.hashCode() | |
| 44 |     override def equals(that: Any): Bool = that match {
 | |
| 45 | case other: Array => rep == other.rep | |
| 46 | case _ => false | |
| 47 | } | |
| 48 | ||
| 49 |     class Values[A](pf: PartialFunction[T, A]) { def values: List[A] = rep.collect(pf).reverse }
 | |
| 50 |     lazy val string = new Values({ case s: String => s })
 | |
| 51 |     lazy val integer = new Values({ case i: Integer => i })
 | |
| 52 |     lazy val float = new Values({ case f: Float => f })
 | |
| 53 |     lazy val boolean = new Values({ case b: Boolean => b })
 | |
| 54 |     lazy val offset_date_time = new Values({ case o: Offset_Date_Time => o })
 | |
| 55 |     lazy val local_date_time = new Values({ case l: Local_Date_Time => l })
 | |
| 56 |     lazy val local_date = new Values({ case l: Local_Date => l })
 | |
| 57 |     lazy val local_time = new Values({ case l: Local_Time => l })
 | |
| 58 |     lazy val array = new Values({ case a: Array => a })
 | |
| 59 |     lazy val table = new Values({ case t: Table => t })
 | |
| 60 |     lazy val any = new Values({ case t => t })
 | |
| 61 | ||
| 62 | def +(elem: T): Array = new Array(elem :: rep) | |
| 63 | def ++(other: Array): Array = new Array(other.rep ::: rep) | |
| 64 | def length: Int = rep.length | |
| 65 | def is_empty: Bool = rep.isEmpty | |
| 66 | } | |
| 67 | ||
| 68 |   object Array {
 | |
| 69 | def apply(elems: Iterable[T]): Array = new Array(elems.toList.reverse) | |
| 70 | def apply(elems: T*): Array = Array(elems) | |
| 71 | } | |
| 72 | ||
| 73 |   class Table private(private val rep: Map[Key, T]) extends T {
 | |
| 74 | override def hashCode(): Int = rep.hashCode() | |
| 75 | override def equals(that: Any): Bool = | |
| 76 |       that match {
 | |
| 77 | case other: Table => rep == other.rep | |
| 78 | case _ => false | |
| 79 | } | |
| 80 | ||
| 81 |     class Value[A: ClassTag](pf: PartialFunction[T, A]) {
 | |
| 82 | def values: List[(Key, A)] = | |
| 83 |         rep.toList.collect { case (k, v) if pf.isDefinedAt(v) => k -> pf(v) }
 | |
| 84 | def get(k: Key): Option[A] = rep.get(k).flatMap(v => PartialFunction.condOpt(v)(pf)) | |
| 85 | def apply(k: Key): A = | |
| 86 |         rep.get(k) match {
 | |
| 87 |           case Some(v) => PartialFunction.condOpt(v)(pf) match {
 | |
| 88 | case Some(value) => value | |
| 89 | case None => | |
| 90 |               error("Expected" + classTag[A].runtimeClass.getName +
 | |
| 78327 | 91 | ", got " + v.getClass.getSimpleName + " for key " + Format.key(k)) | 
| 78316 | 92 | } | 
| 78327 | 93 |           case None => error("Key " + Format.key(k) + " does not exist")
 | 
| 78316 | 94 | } | 
| 95 | } | |
| 96 | ||
| 97 |     lazy val string = new Value({ case s: String => s })
 | |
| 98 |     lazy val integer = new Value({ case i: Integer => i })
 | |
| 99 |     lazy val float = new Value({ case f: Float => f })
 | |
| 100 |     lazy val boolean = new Value({ case b: Boolean => b })
 | |
| 101 |     lazy val offset_date_time = new Value({ case o: Offset_Date_Time => o })
 | |
| 102 |     lazy val local_date_time = new Value({ case l: Local_Date_Time => l })
 | |
| 103 |     lazy val local_date = new Value({ case l: Local_Date => l })
 | |
| 104 |     lazy val local_time = new Value({ case l: Local_Time => l })
 | |
| 105 |     lazy val array = new Value({ case a: Array => a })
 | |
| 106 |     lazy val table = new Value({ case t: Table => t })
 | |
| 107 |     lazy val any = new Value({ case t => t })
 | |
| 108 | ||
| 109 |     def +(elem: (Key, T)): Table = {
 | |
| 110 | val (k, v) = elem | |
| 111 |       val v1 = rep.get(k) match {
 | |
| 112 | case None => v | |
| 113 | case Some(v0) => | |
| 114 |           (v0, v) match {
 | |
| 115 | case (t0: Table, t: Table) => t0 ++ t | |
| 116 | case (a0: Array, a: Array) => a0 ++ a | |
| 78327 | 117 |             case _ => error("Key already present: " + Format.key(k))
 | 
| 78316 | 118 | } | 
| 119 | } | |
| 120 | new Table(rep + (k -> v1)) | |
| 121 | } | |
| 122 | def -(k: Key): Table = new Table(rep - k) | |
| 123 | def ++(other: Table): Table = other.rep.foldLeft(this)(_ + _) | |
| 124 | def domain: Set[Key] = rep.keySet | |
| 125 | def is_empty: Bool = rep.isEmpty | |
| 126 | } | |
| 127 | ||
| 128 |   object Table {
 | |
| 129 | def apply(elems: Iterable[(Key, T)]): Table = elems.foldLeft(new Table(ListMap.empty))(_ + _) | |
| 130 | def apply(elems: (Key, T)*): Table = Table(elems) | |
| 131 | } | |
| 132 | ||
| 133 | ||
| 134 | /* lexer */ | |
| 135 | ||
| 78609 | 136 |   enum Kind { case KEYWORD, VALUE, STRING, MULTILINE_STRING, LINE_SEP, ERROR }
 | 
| 78316 | 137 | |
| 78609 | 138 |   sealed case class Token(kind: Kind, text: Str) {
 | 
| 78316 | 139 | def is_keyword(name: Str): Bool = kind == Kind.KEYWORD && text == name | 
| 140 | def is_value: Bool = kind == Kind.VALUE | |
| 141 | def is_string: Bool = kind == Kind.STRING | |
| 142 | def is_multiline_string: Bool = kind == Kind.MULTILINE_STRING | |
| 143 | def is_line_sep: Bool = kind == Kind.LINE_SEP | |
| 144 | } | |
| 145 | ||
| 146 |   object Lexer extends Scanners with Scan.Parsers {
 | |
| 147 | override type Elem = Char | |
| 148 | type Token = TOML.Token | |
| 149 | ||
| 150 | def errorToken(msg: Str): Token = Token(Kind.ERROR, msg) | |
| 151 | ||
| 152 | val white_space: Str = " \t" | |
| 153 |     override val whiteSpace: Regex = ("[" + white_space + "]+").r
 | |
| 154 | override def whitespace: Parser[Any] = rep(comment | many1(character(white_space.contains(_)))) | |
| 155 | ||
| 156 |     def line_sep: Parser[Str] = rep1("\n" | s"\r\n" | EofCh) ^^ (cs => cs.mkString)
 | |
| 157 | def line_sep_token: Parser[Token] = line_sep ^^ (s => Token(Kind.LINE_SEP, s)) | |
| 158 | ||
| 159 | def is_control(e: Elem): Bool = | |
| 160 |       e <= '\u0008' || ('\u000A' <= e && e <= '\u001F') || e == '\u007F'
 | |
| 161 | ||
| 162 | override def comment: Parser[Str] = '#' ~>! many(character(c => !is_control(c))) | |
| 163 | ||
| 164 |     def keyword: Parser[Token] = one(character("{}[],=.".contains)) ^^ (s => Token(Kind.KEYWORD, s))
 | |
| 165 | ||
| 166 | def is_value(c: Elem): Bool = | |
| 167 | Symbol.is_ascii_letter(c) || Symbol.is_ascii_digit(c) || "_-:+".contains(c) | |
| 168 | def value: Parser[Token] = | |
| 169 | many1(character(is_value)) ~ | |
| 170 |         opt(' ' ~ many1(character(is_value)) ^^ { case ws ~ s => ws.toString + s }) ~
 | |
| 171 |         opt('.' ~ many1(character(is_value)) ^^ { case dot ~ s => dot.toString + s}) ^^
 | |
| 172 |         { case s ~ ss ~ sd => Token(Kind.VALUE, s + ss.getOrElse("") + sd.getOrElse("")) }
 | |
| 173 | ||
| 174 | def string: Parser[Token] = | |
| 175 | multiline_basic_string | basic_string | multiline_literal_string | literal_string | |
| 176 | ||
| 177 | private def trim(s: Str): Str = | |
| 178 |       if (s.startsWith("\n")) s.stripPrefix("\n") else s.stripPrefix("\r\n")
 | |
| 179 | ||
| 180 | def basic_string: Parser[Token] = | |
| 181 | '"' ~> rep(basic_string_elem) <~ '"' ^^ (cs => Token(Kind.STRING, cs.mkString)) | |
| 182 | ||
| 183 | def multiline_basic_string: Parser[Token] = | |
| 184 | "\"\"\"" ~> | |
| 185 | rep(multiline_basic_string_elem | | |
| 186 |           ("\"\"" | "\"") ~ multiline_basic_string_elem ^^ { case s ~ t => s + t }) ~
 | |
| 187 |         repeated(character(_ == '"'), 3, 5) ^^ { case cs ~ q =>
 | |
| 188 | Token(Kind.MULTILINE_STRING, trim(cs.mkString + q.drop(3))) } | |
| 189 | ||
| 190 | private def multiline_basic_string_elem: Parser[Str] = | |
| 191 |       ('\\' ~ line_sep ~ rep(many1(character(white_space.contains)) | line_sep)) ^^ (_ => "") |
 | |
| 192 | basic_string_elem ^^ (_.toString) | line_sep | |
| 193 | ||
| 194 | def literal_string: Parser[Token] = | |
| 195 | '\'' ~> rep(literal_string_elem) <~ '\'' ^^ (cs => Token(Kind.STRING, cs.mkString)) | |
| 196 | ||
| 197 | def multiline_literal_string: Parser[Token] = | |
| 198 | "'''" ~> | |
| 199 | rep(multiline_literal_string_elem | | |
| 200 |           ("''" | "'") ~ multiline_literal_string_elem ^^ { case s ~ t => s + t }) ~
 | |
| 201 |         repeated(character(_ == '\''), 3, 5) ^^ { case cs ~ q =>
 | |
| 202 | Token(Kind.MULTILINE_STRING, trim(cs.mkString + q.drop(3))) } | |
| 203 | ||
| 204 | private def multiline_literal_string_elem: Parser[Str] = | |
| 205 | line_sep | literal_string_elem ^^ (_.toString) | |
| 206 | ||
| 207 | private def basic_string_elem: Parser[Elem] = | |
| 208 |       elem("", c => !is_control(c) && !"\"\\".contains(c)) | '\\' ~> string_escape
 | |
| 209 | ||
| 210 | private def string_escape: Parser[Elem] = | |
| 211 |       elem("", "\"\\".contains(_)) |
 | |
| 212 |         elem("", "btnfr".contains(_)) ^^
 | |
| 213 |           { case 'b' => '\b' case 't' => '\t' case 'n' => '\n' case 'f' => '\f' case 'r' => '\r' } |
 | |
| 214 |         ('u' ~> repeated(character("0123456789abcdefABCDEF".contains(_)), 4, 4) |
 | |
| 215 |           'U' ~> repeated(character("0123456789abcdefABCDEF".contains(_)), 8, 8)) ^^
 | |
| 216 | (s => java.lang.Integer.parseInt(s, 16).toChar) | |
| 217 | ||
| 218 |     private def literal_string_elem: Parser[Elem] = elem("", c => !is_control(c) && c != '\'')
 | |
| 219 | ||
| 220 |     def string_failure: Parser[Token] = ("\"" | "'") ~> failure("Unterminated string")
 | |
| 221 | ||
| 222 | def token: Parser[Token] = | |
| 223 |       line_sep_token | keyword | value | string | string_failure | failure("Unrecognized token")
 | |
| 224 | } | |
| 225 | ||
| 226 | ||
| 227 | /* parser */ | |
| 228 | ||
| 229 |   trait Parsers extends combinator.Parsers {
 | |
| 230 | type Elem = Token | |
| 78317 | 231 | |
| 232 | ||
| 78316 | 233 | /* parse structure */ | 
| 78317 | 234 | |
| 78316 | 235 | type Keys = List[Key] | 
| 236 | ||
| 237 | sealed trait V | |
| 238 | case class Primitive(t: T) extends V | |
| 239 | case class Array(rep: List[V]) extends V | |
| 240 | case class Inline_Table(elems: List[(Keys, V)]) extends V | |
| 241 | ||
| 242 | sealed trait Def | |
| 243 | case class Table(key: Keys, elems: List[(Keys, V)]) extends Def | |
| 244 | case class Array_Of_Tables(key: Keys, elems: List[(Keys, V)]) extends Def | |
| 78317 | 245 | |
| 78316 | 246 | case class File(elems: List[(Keys, V)], defs: List[Def]) | 
| 78317 | 247 | |
| 78316 | 248 | |
| 249 | /* top-level syntax structure */ | |
| 78317 | 250 | |
| 78316 | 251 |     def key: Parser[Keys] = rep1sep(keys, $$$(".")) ^^ (_.flatten)
 | 
| 78317 | 252 | |
| 78316 | 253 | def string: Parser[String] = | 
| 254 |       elem("string", e => e.is_string || e.is_multiline_string) ^^ (s => String(s.text))
 | |
| 255 | def integer: Parser[Integer] = | |
| 256 | (decimal_int | binary_int | octal_int | hexadecimal_int) ^^ Integer.apply | |
| 257 | def float: Parser[Float] = (symbol_float | number_float) ^^ Float.apply | |
| 258 |     def boolean: Parser[Boolean] = token("boolean", _.is_value, s => Boolean(Value.Boolean.parse(s)))
 | |
| 259 | ||
| 260 | def offset_date_time: Parser[Offset_Date_Time] = | |
| 261 |       token("offset date-time", _.is_value,
 | |
| 262 |         s => Offset_Date_Time(OffsetDateTime.parse(s.replace(" ", "T"))))
 | |
| 263 | def local_date_time: Parser[Local_Date_Time] = | |
| 264 |       token("local date-time", _.is_value,
 | |
| 265 |         s => Local_Date_Time(LocalDateTime.parse(s.replace(" ", "T"))))
 | |
| 266 | def local_date: Parser[Local_Date] = | |
| 267 |       token("local date", _.is_value, s => Local_Date(LocalDate.parse(s)))
 | |
| 268 | def local_time: Parser[Local_Time] = | |
| 269 |       token("local time", _.is_value, s => Local_Time(LocalTime.parse(s)))
 | |
| 270 | ||
| 271 | def array: Parser[Array] = | |
| 272 |       $$$("[") ~>! repsep(opt(line_sep) ~> toml_value, opt(line_sep) ~ $$$(",")) <~!
 | |
| 273 |         opt(line_sep) ~! opt($$$(",")) ~! opt(line_sep) <~! $$$("]") ^^ Array.apply
 | |
| 274 | ||
| 275 | def inline_table: Parser[Inline_Table] = | |
| 276 |       $$$("{") ~>! repsep(pair, $$$(",")) <~! $$$("}") ^^ Inline_Table.apply
 | |
| 78317 | 277 | |
| 78316 | 278 |     def pair: Parser[(Keys, V)] = (key <~! $$$("=")) ~! toml_value ^^ { case ks ~ v => (ks, v) }
 | 
| 279 | ||
| 280 |     def table: Parser[Table] = $$$("[") ~> (key <~! $$$("]") ~! line_sep) ~! content ^^
 | |
| 281 |       { case key ~ content => Table(key, content) }
 | |
| 282 | ||
| 283 | def array_of_tables: Parser[Array_Of_Tables] = | |
| 284 |       $$$("[") ~ $$$("[") ~>! (key <~! $$$("]") ~! $$$("]") ~! line_sep) ~! content ^^
 | |
| 285 |         { case key ~ content => Array_Of_Tables(key, content) }
 | |
| 286 | ||
| 287 | def toml: Parser[File] = | |
| 288 | (opt(line_sep) ~>! content ~! rep(table | array_of_tables)) ^^ | |
| 289 |         { case content ~ defs => File(content, defs) }
 | |
| 290 | ||
| 291 | ||
| 292 | /* auxiliary */ | |
| 293 | ||
| 294 | private def $$$(name: Str): Parser[Token] = elem(name, _.is_keyword(name)) | |
| 295 | private def maybe[A, B](p: Parser[A], f: A => B): Parser[B] = | |
| 296 |       p ^^ (a => Try(f(a))) ^? { case util.Success(v) => v }
 | |
| 297 | private def token[A](name: Str, p: Token => Bool, parser: Str => A): Parser[A] = | |
| 298 | maybe(elem(name, p), s => parser(s.text)) | |
| 299 | private def prefixed[A]( | |
| 300 | prefix: Str, name: Str, p: Str => Bool, parser: Str => A | |
| 301 | ): Parser[A] = | |
| 302 | token(name, e => e.is_value && e.text.startsWith(prefix) && p(e.text.stripPrefix(prefix)), | |
| 303 | s => parser(s.stripPrefix(prefix))) | |
| 304 | ||
| 305 |     private def is_key(e: Elem): Bool = e.is_value && !e.text.exists("+: ".contains(_))
 | |
| 306 | private def keys: Parser[Keys] = | |
| 307 |       token("string key", _.is_string, List(_)) | token("key", is_key, _.split('.').toList)
 | |
| 308 | ||
| 309 | private def sep_surrounded(s: Str): Bool = | |
| 310 |       !s.startsWith("_") && !s.endsWith("_") && s.split('_').forall(_.nonEmpty)
 | |
| 311 |     private def no_leading_zero(s: Str): Bool = {
 | |
| 312 |       val t = s.replaceAll("_", "").takeWhile(_.isDigit)
 | |
| 313 |       t == "0" || !t.startsWith("0")
 | |
| 314 | } | |
| 315 | ||
| 316 | private def is_int(s: Str): Bool = | |
| 317 |       no_leading_zero(s.replaceAll("[-+]", "")) && sep_surrounded(s.replaceAll("[-+]", ""))
 | |
| 318 | private def decimal_int: Parser[Long] = | |
| 319 |       token("integer", e => e.is_value && is_int(e.text), _.replace("_", "").toLong)
 | |
| 320 | private def binary_int: Parser[Long] = | |
| 321 |       prefixed("0b", "integer", sep_surrounded, s => parseLong(s.replace("_", ""), 2))
 | |
| 322 | private def octal_int: Parser[Long] = | |
| 323 |       prefixed("0o", "integer", sep_surrounded, s => parseLong(s.replace("_", ""), 8))
 | |
| 324 | private def hexadecimal_int: Parser[Long] = | |
| 325 |       prefixed("0x", "integer", sep_surrounded, s => parseLong(s.replace("_", ""), 16))
 | |
| 326 | ||
| 327 | private def is_float(s: Str): Bool = | |
| 328 |       s.exists(".eE".contains) && s.count("eE".contains) <= 1 &&
 | |
| 329 |         no_leading_zero(s.replaceAll("[-+]", "")) &&
 | |
| 330 |         sep_surrounded(s.replaceAll("[-+]", "").replaceAll("[.eE][+\\-]?", "_"))
 | |
| 331 | private def number_float: Parser[Double] = | |
| 332 |       token("float", e => e.is_value && is_float(e.text), _.replace("_", "").toDouble)
 | |
| 333 | private def symbol_float: Parser[Double] = | |
| 334 |       token("float", _.is_value, {
 | |
| 335 | case "inf" | "+inf" => Double.PositiveInfinity | |
| 336 | case "-inf" => Double.NegativeInfinity | |
| 337 | case "nan" | "+nan" | "-nan" => Double.NaN | |
| 338 | }) | |
| 339 | ||
| 340 | private def toml_value: Parser[V] = (string | float | integer | boolean | offset_date_time | | |
| 341 | local_date_time | local_date | local_time) ^^ Primitive.apply | array | inline_table | |
| 342 | ||
| 343 |     private def line_sep: Parser[Any] = rep1(elem("line sep", _.is_line_sep))
 | |
| 344 | ||
| 345 | private def content: Parser[List[(Keys, V)]] = | |
| 346 |       rep((key <~! $$$("=")) ~! toml_value <~! line_sep ^^ { case ks ~ v => ks -> v })
 | |
| 347 | ||
| 348 | ||
| 349 | /* parse */ | |
| 350 | ||
| 351 |     def parse(input: Str): File = {
 | |
| 352 | val scanner = new Lexer.Scanner(Scan.char_reader(input + EofCh)) | |
| 353 | val result = phrase(toml)(scanner) | |
| 354 |       result match {
 | |
| 355 | case Success(toml, _) => toml | |
| 356 |         case Failure(msg, next) => error("Malformed TOML input at " + next.pos + ": " + msg)
 | |
| 357 |         case Error(msg, next) => error("Error parsing toml at " + next.pos + ": " + msg)
 | |
| 358 | } | |
| 359 | } | |
| 360 | } | |
| 361 | ||
| 362 | object Parsers extends Parsers | |
| 363 | ||
| 364 | ||
| 365 | /* checking and translating parse structure into toml */ | |
| 366 | ||
| 367 | private def prefixes(ks: Keys): Set[Keys] = | |
| 368 | if (ks.isEmpty) Set.empty else Range.inclusive(1, ks.length).toSet.map(ks.take) | |
| 369 | ||
| 370 |   class Parse_Context private(var seen_tables: Map[Keys, Seen]) {
 | |
| 371 | private def splits(ks: Keys): List[(Keys, Keys)] = | |
| 372 | Range.inclusive(0, ks.length).toList.map(n => (ks.dropRight(n), ks.takeRight(n))) | |
| 373 | ||
| 374 | private def recent_array(ks: Keys): (Keys, Seen, Keys) = | |
| 375 |       splits(ks).collectFirst {
 | |
| 376 | case (ks1, ks2) if seen_tables.contains(ks1) => (ks1, seen_tables(ks1), ks2) | |
| 377 | }.get | |
| 378 | ||
| 379 |     private def check_add(start: Int, ks: Keys, elem: Parsers.Def | Parsers.V): Unit = {
 | |
| 380 | val (to, seen, rest, split) = | |
| 381 |         elem match {
 | |
| 382 | case _: Parsers.Array_Of_Tables => | |
| 383 | val (_, seen, rest) = recent_array(ks.dropRight(1)) | |
| 384 | (ks, seen, rest ::: ks.takeRight(1), 0) | |
| 385 | case _ => | |
| 386 | val (to, seen, rest) = recent_array(ks) | |
| 387 | (to, seen, rest, start - to.length) | |
| 388 | } | |
| 389 | val (rest0, rest1) = rest.splitAt(split) | |
| 390 | val implicitly_seen = prefixes(rest1.dropRight(1)).map(rest0 ::: _) | |
| 391 | ||
| 78327 | 392 | seen.inlines.find(rest.startsWith(_)).foreach(ks => | 
| 393 |         error("Attempting to update an inline value at " + Format.keys(ks)))
 | |
| 78316 | 394 | |
| 395 | val (inlines, tables) = | |
| 396 |         elem match {
 | |
| 397 | case _: Parsers.Primitive => | |
| 398 | (seen.inlines, seen.tables ++ implicitly_seen) | |
| 399 | case _: Parsers.Array => | |
| 78327 | 400 | if (seen_tables.contains(ks)) | 
| 401 |               error("Attempting to append with an inline array at " + Format.keys(ks))
 | |
| 78316 | 402 | (seen.inlines + rest, seen.tables ++ implicitly_seen) | 
| 403 | case _: Parsers.Inline_Table => | |
| 78327 | 404 | seen.tables.find(_.startsWith(rest)).foreach(ks => | 
| 405 |               error("Attempting to add with an inline table at " + Format.keys(ks)))
 | |
| 78316 | 406 | (seen.inlines + rest, seen.tables ++ implicitly_seen) | 
| 407 | case _: Parsers.Table => | |
| 78327 | 408 | if (seen.tables.contains(rest)) | 
| 78332 | 409 |               error("Attempting to define a table twice at " + Format.keys(ks))
 | 
| 78316 | 410 | (seen.inlines, seen.tables + rest) | 
| 411 | case _: Parsers.Array_Of_Tables => (Set.empty, Set.empty) | |
| 412 | } | |
| 413 | ||
| 414 | seen_tables += to -> Seen(inlines, tables) | |
| 415 | } | |
| 416 | def check_add_def(ks: Keys, defn: Parsers.Def): Unit = check_add(0, ks, defn) | |
| 417 | def check_add_value(ks0: Keys, ks1: Keys, v: Parsers.V): Unit = | |
| 418 | check_add(ks0.length - 1, ks0 ::: ks1, v) | |
| 419 | } | |
| 420 | ||
| 421 |   object Parse_Context {
 | |
| 422 | case class Seen(inlines: Set[Keys], tables: Set[Keys]) | |
| 423 | ||
| 424 | def empty: Parse_Context = new Parse_Context(Map(Nil -> Seen(Set.empty, Set.empty))) | |
| 425 | } | |
| 426 | ||
| 427 |   def parse(s: Str, context: Parse_Context = Parse_Context.empty): Table = {
 | |
| 428 | val file = Parsers.parse(s) | |
| 429 | ||
| 430 |     def convert(ks0: Keys, ks1: Keys, v: Parsers.V): T = {
 | |
| 431 | def to_table(ks: Keys, t: T): Table = | |
| 432 | ks.dropRight(1).foldRight(Table(ks.last -> t))((k, v) => Table(k -> v)) | |
| 433 |       def to_toml(v: Parsers.V): (T, Set[Keys]) = v match {
 | |
| 434 | case Parsers.Primitive(t) => (t, Set.empty) | |
| 435 | case Parsers.Array(rep) => (Array(rep.map(to_toml(_)._1)), Set.empty) | |
| 436 | case Parsers.Inline_Table(elems) => | |
| 78327 | 437 | elems.find(e => elems.count(_._1 == e._1) > 1).foreach((ks, _) => | 
| 438 |             error("Duplicate key " + Format.keys(ks) + " in inline table at " +
 | |
| 439 | Format.keys(ks0 ::: ks1))) | |
| 78316 | 440 | |
| 441 | val (tables, pfxs, key_spaces) = | |
| 442 |             elems.map { (ks, v) =>
 | |
| 443 | val (t, keys) = to_toml(v) | |
| 444 | (to_table(ks, t), prefixes(ks), keys.map(ks ::: _) + ks) | |
| 445 | }.unzip3 | |
| 446 | ||
| 447 | pfxs.foreach(pfx => if (key_spaces.count(pfx.intersect(_).nonEmpty) > 1) | |
| 78327 | 448 |             error("Inline table not self-contained at " + Format.keys(ks0 ::: ks1)))
 | 
| 78316 | 449 | |
| 450 | (tables.foldLeft(Table())(_ ++ _), pfxs.toSet.flatten ++ key_spaces.toSet.flatten) | |
| 451 | } | |
| 452 | context.check_add_value(ks0, ks1, v) | |
| 453 | to_toml(v)._1 | |
| 454 | } | |
| 455 | ||
| 78327 | 456 |     def update(map: Table, ks0: Keys, value: T): Table = {
 | 
| 457 |       def update_rec(hd: Keys, map: Table, ks: Keys): Table = {
 | |
| 458 | val updated = | |
| 459 |           if (ks.length == 1) {
 | |
| 460 |             map.any.get(ks.head) match {
 | |
| 461 | case Some(a: Array) => | |
| 462 |                 value match {
 | |
| 463 | case a2: Array => a ++ a2 | |
| 464 | case _ => | |
| 465 |                     error("Table conflicts with previous array of tables at " + Format.keys(ks0))
 | |
| 466 | } | |
| 467 |               case Some(t: Table) => value match {
 | |
| 468 | case t2: Table => | |
| 469 | if (t.domain.intersect(t2.domain).nonEmpty) | |
| 470 |                     error("Attempting to redefine existing value in super-table at " +
 | |
| 471 | Format.keys(ks0)) | |
| 472 | else t ++ t2 | |
| 473 |                 case _ => error("Attempting to redefine a table at " + Format.keys(ks0))
 | |
| 78316 | 474 | } | 
| 78327 | 475 |               case Some(_) => error("Attempting to redefine a value at " + Format.keys(ks0))
 | 
| 476 | case None => value | |
| 78316 | 477 | } | 
| 478 | } | |
| 78327 | 479 |           else {
 | 
| 480 | val hd1 = hd :+ ks.head | |
| 481 |             map.any.get(ks.head) match {
 | |
| 482 | case Some(t: Table) => update_rec(hd1, t, ks.tail) | |
| 483 | case Some(a: Array) => | |
| 484 | Array(a.table.values.dropRight(1) :+ update_rec(hd1, a.table.values.last, ks.tail)) | |
| 485 |               case Some(_) => error("Attempting to redefine a value at " + Format.keys(hd1))
 | |
| 486 | case None => update_rec(hd1, Table(), ks.tail) | |
| 487 | } | |
| 78316 | 488 | } | 
| 78327 | 489 | (map - ks.head) + (ks.head -> updated) | 
| 490 | } | |
| 491 | update_rec(Nil, map, ks0) | |
| 78316 | 492 | } | 
| 493 | ||
| 494 | def fold(elems: List[(Keys, T)]): Table = | |
| 495 |       elems.foldLeft(Table()) { case (t0, (ks1, t1)) => update(t0, ks1, t1) }
 | |
| 496 | ||
| 497 | val t = fold(file.elems.map((ks, v) => (ks, convert(Nil, ks, v)))) | |
| 498 |     file.defs.foldLeft(t) {
 | |
| 499 | case (t0, defn@Parsers.Table(ks0, elems)) => | |
| 500 | context.check_add_def(ks0, defn) | |
| 501 | update(t0, ks0, fold(elems.map((ks, v) => (ks, convert(ks0, ks, v))))) | |
| 502 | case (t0, defn@Parsers.Array_Of_Tables(ks0, elems)) => | |
| 503 | context.check_add_def(ks0, defn) | |
| 504 | update(t0, ks0, Array(fold(elems.map((ks, v) => (ks, convert(ks0, ks, v)))))) | |
| 505 | } | |
| 506 | } | |
| 507 | ||
| 508 | ||
| 509 | /* Format TOML */ | |
| 510 | ||
| 511 |   object Format {
 | |
| 78326 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 512 | private def basic_string(s: Str): Str = | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 513 |       "\"" + s.iterator.map {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 514 | case '\b' => "\\b" case '\t' => "\\t" case '\n' => "\\n" case '\f' => "\\f" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 515 | case '\r' => "\\r" case '"' => "\\\"" case '\\' => "\\\\" case c => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 516 | if (c <= '\u001f' || c == '\u007f') "\\u%04x".format(c.toInt) else c | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 517 | }.mkString + "\"" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 518 | |
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 519 | private def multiline_basic_string(s: Str): Str = | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 520 |       "\"\"\"\n" + s.iterator.map {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 521 | case '\b' => "\\b" case '\t' => "\t" case '\n' => "\n" case '\f' => "\\f" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 522 | case '\r' => "\r" case '"' => "\\\"" case '\\' => "\\\\" case c => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 523 | if (c <= '\u001f' || c == '\u007f') "\\u%04x".format(c.toInt) else c | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 524 |       }.mkString.replace("\"\"\"", "\"\"\\\"") + "\"\"\""
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 525 | |
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 526 |     def key(k: Key): Str = {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 527 | val Bare_Key = """[A-Za-z0-9_-]+""".r | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 528 |       k match {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 529 | case Bare_Key() => k | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 530 | case _ => basic_string(k) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 531 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 532 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 533 | |
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 534 |     def keys(ks: Keys): Str = ks.mkString(".")
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 535 | |
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 536 |     def inline(t: T, indent: Int = 0): Str = {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 537 | val result = new StringBuilder | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 538 | def indentation(i: Int): Unit = for (_ <- Range(0, i)) result ++= " " | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 539 | |
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 540 | indentation(indent) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 541 |       t match {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 542 | case s: String => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 543 |           if (s.rep.contains("\n") && s.rep.length > 20) result ++= multiline_basic_string(s.rep)
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 544 | else result ++= basic_string(s.rep) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 545 | case i: Integer => result ++= i.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 546 | case f: Float => result ++= f.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 547 | case b: Boolean => result ++= b.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 548 | case o: Offset_Date_Time => result ++= o.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 549 | case l: Local_Date_Time => result ++= l.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 550 | case l: Local_Date => result ++= l.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 551 | case l: Local_Time => result ++= l.rep.toString | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 552 | case a: Array => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 553 | if (a.is_empty) result ++= "[]" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 554 |           else {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 555 | result ++= "[\n" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 556 |             a.any.values.foreach { elem =>
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 557 | result ++= inline(elem, indent + 1) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 558 | result ++= ",\n" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 559 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 560 | indentation(indent) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 561 | result += ']' | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 562 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 563 | case table: Table => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 564 |           if (table.is_empty) result ++= "{}"
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 565 |           else {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 566 |             result ++= "{ "
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 567 |             Library.separate(None, table.any.values.map(Some(_))).foreach {
 | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 568 | case None => result ++= ", " | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 569 | case Some((k, v)) => | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 570 | result ++= key(k) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 571 | result ++= " = " | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 572 | result ++= inline(v) | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 573 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 574 | result ++= " }" | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 575 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 576 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 577 | result.toString() | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 578 | } | 
| 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 579 | |
| 78316 | 580 |     def apply(toml: Table): Str = {
 | 
| 581 | val result = new StringBuilder | |
| 582 | ||
| 583 | def inline_values(path: List[Key], t: T): Unit = | |
| 584 |         t match {
 | |
| 585 |           case t: Table => t.any.values.foreach { case (k, v1) => inline_values(k :: path, v1) }
 | |
| 586 | case _ => | |
| 78326 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 587 | result ++= keys(path.reverse) | 
| 78316 | 588 | result ++= " = " | 
| 78326 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 589 | result ++= inline(t) | 
| 78316 | 590 | result += '\n' | 
| 591 | } | |
| 592 | ||
| 593 | def is_inline(elem: T): Bool = | |
| 594 |         elem match {
 | |
| 595 | case _: String | _: Integer | _: Float | _: Boolean | _: Offset_Date_Time | | |
| 596 | _: Local_Date_Time | _: Local_Date | _: Local_Time => true | |
| 597 | case a: Array => a.any.values.forall(is_inline) | |
| 598 | case _ => false | |
| 599 | } | |
| 600 |       def is_table(elem: T): Bool = elem match { case _: Table => true case _ => false }
 | |
| 601 | ||
| 602 | def array(path: List[Key], a: Array): Unit = | |
| 603 | if (a.any.values.forall(is_inline) || !a.any.values.forall(is_table)) | |
| 604 | inline_values(path.take(1), a) | |
| 605 |         else a.table.values.foreach { t =>
 | |
| 606 | result ++= "\n[[" | |
| 78326 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 607 | result ++= keys(path.reverse) | 
| 78316 | 608 | result ++= "]]\n" | 
| 609 | table(path, t, is_table_entry = true) | |
| 610 | } | |
| 611 | ||
| 612 |       def table(path: List[Key], t: Table, is_table_entry: Bool = false): Unit = {
 | |
| 613 | val (values, nodes) = t.any.values.partition(kv => is_inline(kv._2)) | |
| 614 | ||
| 615 |         if (!is_table_entry && path.nonEmpty) {
 | |
| 616 | result ++= "\n[" | |
| 78326 
1cdc65476c2e
more TOML formatting functions;
 Fabian Huch <huch@in.tum.de> parents: 
78317diff
changeset | 617 | result ++= keys(path.reverse) | 
| 78316 | 618 | result ++= "]\n" | 
| 619 | } | |
| 620 | ||
| 621 |         values.foreach { case (k, v) => inline_values(List(k), v) }
 | |
| 622 |         nodes.foreach {
 | |
| 623 | case (k, t: Table) => table(k :: path, t) | |
| 624 | case (k, arr: Array) => array(k :: path, arr) | |
| 625 | case _ => | |
| 626 | } | |
| 627 | } | |
| 628 | ||
| 629 | table(Nil, toml) | |
| 630 | result.toString | |
| 631 | } | |
| 632 | } | |
| 633 | } |