| author | wenzelm | 
| Tue, 23 May 2023 18:46:15 +0200 | |
| changeset 78095 | bc42c074e58f | 
| parent 76914 | 1bc50ffad6d2 | 
| child 78912 | ff4496b25197 | 
| permissions | -rw-r--r-- | 
| 63604 | 1 | /* Title: Pure/Isar/document_structure.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Overall document structure. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 10 | import scala.collection.mutable | |
| 11 | import scala.annotation.tailrec | |
| 12 | ||
| 13 | ||
| 75393 | 14 | object Document_Structure {
 | 
| 63610 | 15 | /** general structure **/ | 
| 63604 | 16 | |
| 17 |   sealed abstract class Document { def length: Int }
 | |
| 75393 | 18 |   case class Block(name: String, text: String, body: List[Document]) extends Document {
 | 
| 19 | val length: Int = body.foldLeft(0)(_ + _.length) | |
| 20 | } | |
| 63610 | 21 | case class Atom(length: Int) extends Document | 
| 63605 | 22 | |
| 69859 | 23 | def is_theory_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 68845 
3b2daa7bf9f4
support Thy_Element in Scala, following ML version;
 wenzelm parents: 
68840diff
changeset | 24 | command.span.is_kind(keywords, | 
| 
3b2daa7bf9f4
support Thy_Element in Scala, following ML version;
 wenzelm parents: 
68840diff
changeset | 25 | kind => Keyword.theory(kind) && !Keyword.theory_end(kind), false) | 
| 63605 | 26 | |
| 70148 | 27 | def is_document_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 28 | command.span.is_kind(keywords, Keyword.document, false) | |
| 29 | ||
| 71165 | 30 | def is_diag_command(keywords: Keyword.Keywords, command: Command): Boolean = | 
| 31 | command.span.is_kind(keywords, Keyword.diag, false) | |
| 32 | ||
| 69869 | 33 | def is_heading_command(command: Command): Boolean = | 
| 34 | proper_heading_level(command).isDefined | |
| 35 | ||
| 69860 | 36 | def proper_heading_level(command: Command): Option[Int] = | 
| 69859 | 37 |     command.span.name match {
 | 
| 38 | case Thy_Header.CHAPTER => Some(0) | |
| 39 | case Thy_Header.SECTION => Some(1) | |
| 40 | case Thy_Header.SUBSECTION => Some(2) | |
| 41 | case Thy_Header.SUBSUBSECTION => Some(3) | |
| 42 | case Thy_Header.PARAGRAPH => Some(4) | |
| 43 | case Thy_Header.SUBPARAGRAPH => Some(5) | |
| 44 | case _ => None | |
| 45 | } | |
| 69860 | 46 | |
| 47 | def heading_level(keywords: Keyword.Keywords, command: Command): Option[Int] = | |
| 48 | proper_heading_level(command) orElse | |
| 49 | (if (is_theory_command(keywords, command)) Some(6) else None) | |
| 69859 | 50 | |
| 63604 | 51 | |
| 63606 | 52 | |
| 63610 | 53 | /** context blocks **/ | 
| 63606 | 54 | |
| 55 | def parse_blocks( | |
| 56 | syntax: Outer_Syntax, | |
| 57 | node_name: Document.Node.Name, | |
| 75393 | 58 | text: CharSequence | 
| 59 |   ): List[Document] = {
 | |
| 63607 | 60 | def is_plain_theory(command: Command): Boolean = | 
| 68840 | 61 | is_theory_command(syntax.keywords, command) && | 
| 63607 | 62 | !command.span.is_begin && !command.span.is_end | 
| 63 | ||
| 64 | ||
| 63606 | 65 | /* stack operations */ | 
| 66 | ||
| 67 | def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | |
| 68 | ||
| 69 | var stack: List[(Command, mutable.ListBuffer[Document])] = | |
| 70 | List((Command.empty, buffer())) | |
| 71 | ||
| 73340 | 72 |     def open(command: Command): Unit = { stack = (command, buffer()) :: stack }
 | 
| 63607 | 73 | |
| 63606 | 74 | def close(): Boolean = | 
| 75 |       stack match {
 | |
| 76 | case (command, body) :: (_, body2) :: _ => | |
| 77 | body2 += Block(command.span.name, command.source, body.toList) | |
| 78 | stack = stack.tail | |
| 79 | true | |
| 80 | case _ => | |
| 81 | false | |
| 82 | } | |
| 83 | ||
| 73340 | 84 | def flush(): Unit = if (is_plain_theory(stack.head._1)) close() | 
| 63607 | 85 | |
| 75393 | 86 |     def result(): List[Document] = {
 | 
| 63606 | 87 |       while (close()) { }
 | 
| 88 | stack.head._2.toList | |
| 89 | } | |
| 90 | ||
| 75393 | 91 |     def add(command: Command): Unit = {
 | 
| 63607 | 92 |       if (command.span.is_begin || is_plain_theory(command)) { flush(); open(command) }
 | 
| 93 |       else if (command.span.is_end) { flush(); close() }
 | |
| 63606 | 94 | |
| 63610 | 95 | stack.head._2 += Atom(command.source.length) | 
| 63606 | 96 | } | 
| 97 | ||
| 98 | ||
| 99 | /* result structure */ | |
| 100 | ||
| 101 | val spans = syntax.parse_spans(text) | |
| 76914 | 102 | spans.foreach(span => add(Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span))) | 
| 63606 | 103 | result() | 
| 104 | } | |
| 105 | ||
| 106 | ||
| 107 | ||
| 63610 | 108 | /** section headings **/ | 
| 109 | ||
| 75393 | 110 |   trait Item {
 | 
| 63610 | 111 | def name: String = "" | 
| 112 | def source: String = "" | |
| 113 | def heading_level: Option[Int] = None | |
| 114 | } | |
| 63604 | 115 | |
| 63610 | 116 | object No_Item extends Item | 
| 63604 | 117 | |
| 75393 | 118 |   class Sections(keywords: Keyword.Keywords) {
 | 
| 63610 | 119 | private def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | 
| 63604 | 120 | |
| 63610 | 121 | private var stack: List[(Int, Item, mutable.ListBuffer[Document])] = | 
| 122 | List((0, No_Item, buffer())) | |
| 123 | ||
| 75393 | 124 |     @tailrec private def close(level: Int => Boolean): Unit = {
 | 
| 63604 | 125 |       stack match {
 | 
| 63610 | 126 | case (lev, item, body) :: (_, _, body2) :: _ if level(lev) => | 
| 127 | body2 += Block(item.name, item.source, body.toList) | |
| 63604 | 128 | stack = stack.tail | 
| 129 | close(level) | |
| 130 | case _ => | |
| 131 | } | |
| 132 | } | |
| 133 | ||
| 75393 | 134 |     def result(): List[Document] = {
 | 
| 63604 | 135 | close(_ => true) | 
| 136 | stack.head._3.toList | |
| 137 | } | |
| 138 | ||
| 75393 | 139 |     def add(item: Item): Unit = {
 | 
| 63610 | 140 |       item.heading_level match {
 | 
| 63604 | 141 | case Some(i) => | 
| 142 | close(_ > i) | |
| 63610 | 143 | stack = (i + 1, item, buffer()) :: stack | 
| 63604 | 144 | case None => | 
| 145 | } | |
| 63610 | 146 | stack.head._3 += Atom(item.source.length) | 
| 63604 | 147 | } | 
| 63610 | 148 | } | 
| 63604 | 149 | |
| 150 | ||
| 63610 | 151 | /* outer syntax sections */ | 
| 152 | ||
| 75393 | 153 |   private class Command_Item(keywords: Keyword.Keywords, command: Command) extends Item {
 | 
| 63610 | 154 | override def name: String = command.span.name | 
| 155 | override def source: String = command.source | |
| 69859 | 156 | override def heading_level: Option[Int] = Document_Structure.heading_level(keywords, command) | 
| 63610 | 157 | } | 
| 158 | ||
| 159 | def parse_sections( | |
| 160 | syntax: Outer_Syntax, | |
| 161 | node_name: Document.Node.Name, | |
| 75393 | 162 | text: CharSequence | 
| 163 |   ): List[Document] = {
 | |
| 63610 | 164 | val sections = new Sections(syntax.keywords) | 
| 165 | ||
| 166 |     for { span <- syntax.parse_spans(text) } {
 | |
| 167 | sections.add( | |
| 168 | new Command_Item(syntax.keywords, | |
| 76914 | 169 | Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span))) | 
| 63610 | 170 | } | 
| 171 | sections.result() | |
| 172 | } | |
| 173 | ||
| 63604 | 174 | |
| 63610 | 175 | /* ML sections */ | 
| 176 | ||
| 75393 | 177 |   private class ML_Item(token: ML_Lex.Token, level: Option[Int]) extends Item {
 | 
| 63610 | 178 | override def source: String = token.source | 
| 179 | override def heading_level: Option[Int] = level | |
| 180 | } | |
| 181 | ||
| 75393 | 182 |   def parse_ml_sections(SML: Boolean, text: CharSequence): List[Document] = {
 | 
| 63610 | 183 | val sections = new Sections(Keyword.Keywords.empty) | 
| 184 | val nl = new ML_Item(ML_Lex.Token(ML_Lex.Kind.SPACE, "\n"), None) | |
| 185 | ||
| 186 | var context: Scan.Line_Context = Scan.Finished | |
| 187 |     for (line <- Library.separated_chunks(_ == '\n', text)) {
 | |
| 188 | val (toks, next_context) = ML_Lex.tokenize_line(SML, line, context) | |
| 189 | val level = | |
| 190 |         toks.filterNot(_.is_space) match {
 | |
| 191 | case List(tok) if tok.is_comment => | |
| 192 | val s = tok.source | |
| 75393 | 193 |             if (Codepoint.iterator(s).exists(c => Character.isLetter(c) || Character.isDigit(c))) {
 | 
| 63666 | 194 |               if (s.startsWith("(**** ") && s.endsWith(" ****)")) Some(0)
 | 
| 195 |               else if (s.startsWith("(*** ") && s.endsWith(" ***)")) Some(1)
 | |
| 196 |               else if (s.startsWith("(** ") && s.endsWith(" **)")) Some(2)
 | |
| 197 |               else if (s.startsWith("(* ") && s.endsWith(" *)")) Some(3)
 | |
| 198 | else None | |
| 199 | } | |
| 63610 | 200 | else None | 
| 201 | case _ => None | |
| 202 | } | |
| 203 | if (level.isDefined && context == Scan.Finished && next_context == Scan.Finished) | |
| 204 | toks.foreach(tok => sections.add(new ML_Item(tok, if (tok.is_comment) level else None))) | |
| 205 | else | |
| 206 | toks.foreach(tok => sections.add(new ML_Item(tok, None))) | |
| 207 | ||
| 208 | sections.add(nl) | |
| 209 | context = next_context | |
| 210 | } | |
| 211 | sections.result() | |
| 63604 | 212 | } | 
| 213 | } |