| author | wenzelm | 
| Tue, 23 Nov 2021 21:02:13 +0100 | |
| changeset 74836 | a97ec0954c50 | 
| parent 73359 | d8a0e996614b | 
| child 75393 | 87ebf5a50283 | 
| permissions | -rw-r--r-- | 
| 63604 | 1 | /* Title: Pure/Isar/document_structure.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Overall document structure. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 10 | import scala.collection.mutable | |
| 11 | import scala.annotation.tailrec | |
| 12 | ||
| 13 | ||
| 14 | object Document_Structure | |
| 15 | {
 | |
| 63610 | 16 | /** general structure **/ | 
| 63604 | 17 | |
| 18 |   sealed abstract class Document { def length: Int }
 | |
| 19 | case class Block(name: String, text: String, body: List[Document]) extends Document | |
| 73359 | 20 |   { val length: Int = body.foldLeft(0)(_ + _.length) }
 | 
| 63610 | 21 | case class Atom(length: Int) extends Document | 
| 63605 | 22 | |
| 69859 | 23 | def is_theory_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 68845 
3b2daa7bf9f4
support Thy_Element in Scala, following ML version;
 wenzelm parents: 
68840diff
changeset | 24 | command.span.is_kind(keywords, | 
| 
3b2daa7bf9f4
support Thy_Element in Scala, following ML version;
 wenzelm parents: 
68840diff
changeset | 25 | kind => Keyword.theory(kind) && !Keyword.theory_end(kind), false) | 
| 63605 | 26 | |
| 70148 | 27 | def is_document_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 28 | command.span.is_kind(keywords, Keyword.document, false) | |
| 29 | ||
| 71165 | 30 | def is_diag_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 31 | command.span.is_kind(keywords, Keyword.diag, false) | |
| 32 | ||
| 69869 | 33 | def is_heading_command(command: Command): Boolean = | 
| 34 | proper_heading_level(command).isDefined | |
| 35 | ||
| 69860 | 36 | def proper_heading_level(command: Command): Option[Int] = | 
| 69859 | 37 |     command.span.name match {
 | 
| 38 | case Thy_Header.CHAPTER => Some(0) | |
| 39 | case Thy_Header.SECTION => Some(1) | |
| 40 | case Thy_Header.SUBSECTION => Some(2) | |
| 41 | case Thy_Header.SUBSUBSECTION => Some(3) | |
| 42 | case Thy_Header.PARAGRAPH => Some(4) | |
| 43 | case Thy_Header.SUBPARAGRAPH => Some(5) | |
| 44 | case _ => None | |
| 45 | } | |
| 69860 | 46 | |
| 47 | def heading_level(keywords: Keyword.Keywords, command: Command): Option[Int] = | |
| 48 | proper_heading_level(command) orElse | |
| 49 | (if (is_theory_command(keywords, command)) Some(6) else None) | |
| 69859 | 50 | |
| 63604 | 51 | |
| 63606 | 52 | |
| 63610 | 53 | /** context blocks **/ | 
| 63606 | 54 | |
| 55 | def parse_blocks( | |
| 56 | syntax: Outer_Syntax, | |
| 57 | node_name: Document.Node.Name, | |
| 58 | text: CharSequence): List[Document] = | |
| 59 |   {
 | |
| 63607 | 60 | def is_plain_theory(command: Command): Boolean = | 
| 68840 | 61 | is_theory_command(syntax.keywords, command) && | 
| 63607 | 62 | !command.span.is_begin && !command.span.is_end | 
| 63 | ||
| 64 | ||
| 63606 | 65 | /* stack operations */ | 
| 66 | ||
| 67 | def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | |
| 68 | ||
| 69 | var stack: List[(Command, mutable.ListBuffer[Document])] = | |
| 70 | List((Command.empty, buffer())) | |
| 71 | ||
| 73340 | 72 |     def open(command: Command): Unit = { stack = (command, buffer()) :: stack }
 | 
| 63607 | 73 | |
| 63606 | 74 | def close(): Boolean = | 
| 75 |       stack match {
 | |
| 76 | case (command, body) :: (_, body2) :: _ => | |
| 77 | body2 += Block(command.span.name, command.source, body.toList) | |
| 78 | stack = stack.tail | |
| 79 | true | |
| 80 | case _ => | |
| 81 | false | |
| 82 | } | |
| 83 | ||
| 73340 | 84 | def flush(): Unit = if (is_plain_theory(stack.head._1)) close() | 
| 63607 | 85 | |
| 63606 | 86 | def result(): List[Document] = | 
| 87 |     {
 | |
| 88 |       while (close()) { }
 | |
| 89 | stack.head._2.toList | |
| 90 | } | |
| 91 | ||
| 73340 | 92 | def add(command: Command): Unit = | 
| 63606 | 93 |     {
 | 
| 63607 | 94 |       if (command.span.is_begin || is_plain_theory(command)) { flush(); open(command) }
 | 
| 95 |       else if (command.span.is_end) { flush(); close() }
 | |
| 63606 | 96 | |
| 63610 | 97 | stack.head._2 += Atom(command.source.length) | 
| 63606 | 98 | } | 
| 99 | ||
| 100 | ||
| 101 | /* result structure */ | |
| 102 | ||
| 103 | val spans = syntax.parse_spans(text) | |
| 72814 | 104 | spans.foreach(span => add(Command(Document_ID.none, node_name, Command.Blobs_Info.none, span))) | 
| 63606 | 105 | result() | 
| 106 | } | |
| 107 | ||
| 108 | ||
| 109 | ||
| 63610 | 110 | /** section headings **/ | 
| 111 | ||
| 112 | trait Item | |
| 63604 | 113 |   {
 | 
| 63610 | 114 | def name: String = "" | 
| 115 | def source: String = "" | |
| 116 | def heading_level: Option[Int] = None | |
| 117 | } | |
| 63604 | 118 | |
| 63610 | 119 | object No_Item extends Item | 
| 63604 | 120 | |
| 63610 | 121 | class Sections(keywords: Keyword.Keywords) | 
| 122 |   {
 | |
| 123 | private def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | |
| 63604 | 124 | |
| 63610 | 125 | private var stack: List[(Int, Item, mutable.ListBuffer[Document])] = | 
| 126 | List((0, No_Item, buffer())) | |
| 127 | ||
| 73340 | 128 | @tailrec private def close(level: Int => Boolean): Unit = | 
| 63604 | 129 |     {
 | 
| 130 |       stack match {
 | |
| 63610 | 131 | case (lev, item, body) :: (_, _, body2) :: _ if level(lev) => | 
| 132 | body2 += Block(item.name, item.source, body.toList) | |
| 63604 | 133 | stack = stack.tail | 
| 134 | close(level) | |
| 135 | case _ => | |
| 136 | } | |
| 137 | } | |
| 138 | ||
| 139 | def result(): List[Document] = | |
| 140 |     {
 | |
| 141 | close(_ => true) | |
| 142 | stack.head._3.toList | |
| 143 | } | |
| 144 | ||
| 73340 | 145 | def add(item: Item): Unit = | 
| 63604 | 146 |     {
 | 
| 63610 | 147 |       item.heading_level match {
 | 
| 63604 | 148 | case Some(i) => | 
| 149 | close(_ > i) | |
| 63610 | 150 | stack = (i + 1, item, buffer()) :: stack | 
| 63604 | 151 | case None => | 
| 152 | } | |
| 63610 | 153 | stack.head._3 += Atom(item.source.length) | 
| 63604 | 154 | } | 
| 63610 | 155 | } | 
| 63604 | 156 | |
| 157 | ||
| 63610 | 158 | /* outer syntax sections */ | 
| 159 | ||
| 160 | private class Command_Item(keywords: Keyword.Keywords, command: Command) extends Item | |
| 161 |   {
 | |
| 162 | override def name: String = command.span.name | |
| 163 | override def source: String = command.source | |
| 69859 | 164 | override def heading_level: Option[Int] = Document_Structure.heading_level(keywords, command) | 
| 63610 | 165 | } | 
| 166 | ||
| 167 | def parse_sections( | |
| 168 | syntax: Outer_Syntax, | |
| 169 | node_name: Document.Node.Name, | |
| 170 | text: CharSequence): List[Document] = | |
| 171 |   {
 | |
| 172 | val sections = new Sections(syntax.keywords) | |
| 173 | ||
| 174 |     for { span <- syntax.parse_spans(text) } {
 | |
| 175 | sections.add( | |
| 176 | new Command_Item(syntax.keywords, | |
| 72814 | 177 | Command(Document_ID.none, node_name, Command.Blobs_Info.none, span))) | 
| 63610 | 178 | } | 
| 179 | sections.result() | |
| 180 | } | |
| 181 | ||
| 63604 | 182 | |
| 63610 | 183 | /* ML sections */ | 
| 184 | ||
| 185 | private class ML_Item(token: ML_Lex.Token, level: Option[Int]) extends Item | |
| 186 |   {
 | |
| 187 | override def source: String = token.source | |
| 188 | override def heading_level: Option[Int] = level | |
| 189 | } | |
| 190 | ||
| 191 | def parse_ml_sections(SML: Boolean, text: CharSequence): List[Document] = | |
| 192 |   {
 | |
| 193 | val sections = new Sections(Keyword.Keywords.empty) | |
| 194 | val nl = new ML_Item(ML_Lex.Token(ML_Lex.Kind.SPACE, "\n"), None) | |
| 195 | ||
| 196 | var context: Scan.Line_Context = Scan.Finished | |
| 197 |     for (line <- Library.separated_chunks(_ == '\n', text)) {
 | |
| 198 | val (toks, next_context) = ML_Lex.tokenize_line(SML, line, context) | |
| 199 | val level = | |
| 200 |         toks.filterNot(_.is_space) match {
 | |
| 201 | case List(tok) if tok.is_comment => | |
| 202 | val s = tok.source | |
| 64610 | 203 | if (Codepoint.iterator(s).exists(c => Character.isLetter(c) || Character.isDigit(c))) | 
| 63666 | 204 |             {
 | 
| 205 |               if (s.startsWith("(**** ") && s.endsWith(" ****)")) Some(0)
 | |
| 206 |               else if (s.startsWith("(*** ") && s.endsWith(" ***)")) Some(1)
 | |
| 207 |               else if (s.startsWith("(** ") && s.endsWith(" **)")) Some(2)
 | |
| 208 |               else if (s.startsWith("(* ") && s.endsWith(" *)")) Some(3)
 | |
| 209 | else None | |
| 210 | } | |
| 63610 | 211 | else None | 
| 212 | case _ => None | |
| 213 | } | |
| 214 | if (level.isDefined && context == Scan.Finished && next_context == Scan.Finished) | |
| 215 | toks.foreach(tok => sections.add(new ML_Item(tok, if (tok.is_comment) level else None))) | |
| 216 | else | |
| 217 | toks.foreach(tok => sections.add(new ML_Item(tok, None))) | |
| 218 | ||
| 219 | sections.add(nl) | |
| 220 | context = next_context | |
| 221 | } | |
| 222 | sections.result() | |
| 63604 | 223 | } | 
| 224 | } |