| author | Fabian Huch <huch@in.tum.de> | 
| Wed, 03 Jul 2024 21:11:24 +0200 | |
| changeset 80497 | bd00bdf39c86 | 
| parent 78912 | ff4496b25197 | 
| permissions | -rw-r--r-- | 
| 63604 | 1 | /* Title: Pure/Isar/document_structure.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Overall document structure. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 10 | import scala.collection.mutable | |
| 11 | import scala.annotation.tailrec | |
| 12 | ||
| 13 | ||
| 75393 | 14 | object Document_Structure {
 | 
| 63610 | 15 | /** general structure **/ | 
| 63604 | 16 | |
| 17 |   sealed abstract class Document { def length: Int }
 | |
| 75393 | 18 |   case class Block(name: String, text: String, body: List[Document]) extends Document {
 | 
| 19 | val length: Int = body.foldLeft(0)(_ + _.length) | |
| 20 | } | |
| 63610 | 21 | case class Atom(length: Int) extends Document | 
| 63605 | 22 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 23 | def is_theory_command(command: Command): Boolean = | 
| 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 24 | command.span.is_keyword_kind(kind => Keyword.theory(kind) && !Keyword.theory_end(kind)) | 
| 63605 | 25 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 26 | def is_document_command(command: Command): Boolean = | 
| 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 27 | command.span.is_keyword_kind(Keyword.document) | 
| 70148 | 28 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 29 | def is_diag_command(command: Command): Boolean = | 
| 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 30 | command.span.is_keyword_kind(Keyword.diag) | 
| 71165 | 31 | |
| 69869 | 32 | def is_heading_command(command: Command): Boolean = | 
| 33 | proper_heading_level(command).isDefined | |
| 34 | ||
| 69860 | 35 | def proper_heading_level(command: Command): Option[Int] = | 
| 69859 | 36 |     command.span.name match {
 | 
| 37 | case Thy_Header.CHAPTER => Some(0) | |
| 38 | case Thy_Header.SECTION => Some(1) | |
| 39 | case Thy_Header.SUBSECTION => Some(2) | |
| 40 | case Thy_Header.SUBSUBSECTION => Some(3) | |
| 41 | case Thy_Header.PARAGRAPH => Some(4) | |
| 42 | case Thy_Header.SUBPARAGRAPH => Some(5) | |
| 43 | case _ => None | |
| 44 | } | |
| 69860 | 45 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 46 | def heading_level(command: Command): Option[Int] = | 
| 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 47 | proper_heading_level(command) orElse (if (is_theory_command(command)) Some(6) else None) | 
| 69859 | 48 | |
| 63604 | 49 | |
| 63606 | 50 | |
| 63610 | 51 | /** context blocks **/ | 
| 63606 | 52 | |
| 53 | def parse_blocks( | |
| 54 | syntax: Outer_Syntax, | |
| 55 | node_name: Document.Node.Name, | |
| 75393 | 56 | text: CharSequence | 
| 57 |   ): List[Document] = {
 | |
| 63607 | 58 | def is_plain_theory(command: Command): Boolean = | 
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 59 | is_theory_command(command) && !command.span.is_begin && !command.span.is_end | 
| 63607 | 60 | |
| 61 | ||
| 63606 | 62 | /* stack operations */ | 
| 63 | ||
| 64 | def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | |
| 65 | ||
| 66 | var stack: List[(Command, mutable.ListBuffer[Document])] = | |
| 67 | List((Command.empty, buffer())) | |
| 68 | ||
| 73340 | 69 |     def open(command: Command): Unit = { stack = (command, buffer()) :: stack }
 | 
| 63607 | 70 | |
| 63606 | 71 | def close(): Boolean = | 
| 72 |       stack match {
 | |
| 73 | case (command, body) :: (_, body2) :: _ => | |
| 74 | body2 += Block(command.span.name, command.source, body.toList) | |
| 75 | stack = stack.tail | |
| 76 | true | |
| 77 | case _ => | |
| 78 | false | |
| 79 | } | |
| 80 | ||
| 73340 | 81 | def flush(): Unit = if (is_plain_theory(stack.head._1)) close() | 
| 63607 | 82 | |
| 75393 | 83 |     def result(): List[Document] = {
 | 
| 63606 | 84 |       while (close()) { }
 | 
| 85 | stack.head._2.toList | |
| 86 | } | |
| 87 | ||
| 75393 | 88 |     def add(command: Command): Unit = {
 | 
| 63607 | 89 |       if (command.span.is_begin || is_plain_theory(command)) { flush(); open(command) }
 | 
| 90 |       else if (command.span.is_end) { flush(); close() }
 | |
| 63606 | 91 | |
| 63610 | 92 | stack.head._2 += Atom(command.source.length) | 
| 63606 | 93 | } | 
| 94 | ||
| 95 | ||
| 96 | /* result structure */ | |
| 97 | ||
| 98 | val spans = syntax.parse_spans(text) | |
| 76914 | 99 | spans.foreach(span => add(Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span))) | 
| 63606 | 100 | result() | 
| 101 | } | |
| 102 | ||
| 103 | ||
| 104 | ||
| 63610 | 105 | /** section headings **/ | 
| 106 | ||
| 75393 | 107 |   trait Item {
 | 
| 63610 | 108 | def name: String = "" | 
| 109 | def source: String = "" | |
| 110 | def heading_level: Option[Int] = None | |
| 111 | } | |
| 63604 | 112 | |
| 63610 | 113 | object No_Item extends Item | 
| 63604 | 114 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 115 |   class Sections {
 | 
| 63610 | 116 | private def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document] | 
| 63604 | 117 | |
| 63610 | 118 | private var stack: List[(Int, Item, mutable.ListBuffer[Document])] = | 
| 119 | List((0, No_Item, buffer())) | |
| 120 | ||
| 75393 | 121 |     @tailrec private def close(level: Int => Boolean): Unit = {
 | 
| 63604 | 122 |       stack match {
 | 
| 63610 | 123 | case (lev, item, body) :: (_, _, body2) :: _ if level(lev) => | 
| 124 | body2 += Block(item.name, item.source, body.toList) | |
| 63604 | 125 | stack = stack.tail | 
| 126 | close(level) | |
| 127 | case _ => | |
| 128 | } | |
| 129 | } | |
| 130 | ||
| 75393 | 131 |     def result(): List[Document] = {
 | 
| 63604 | 132 | close(_ => true) | 
| 133 | stack.head._3.toList | |
| 134 | } | |
| 135 | ||
| 75393 | 136 |     def add(item: Item): Unit = {
 | 
| 63610 | 137 |       item.heading_level match {
 | 
| 63604 | 138 | case Some(i) => | 
| 139 | close(_ > i) | |
| 63610 | 140 | stack = (i + 1, item, buffer()) :: stack | 
| 63604 | 141 | case None => | 
| 142 | } | |
| 63610 | 143 | stack.head._3 += Atom(item.source.length) | 
| 63604 | 144 | } | 
| 63610 | 145 | } | 
| 63604 | 146 | |
| 147 | ||
| 63610 | 148 | /* outer syntax sections */ | 
| 149 | ||
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 150 |   private class Command_Item(command: Command) extends Item {
 | 
| 63610 | 151 | override def name: String = command.span.name | 
| 152 | override def source: String = command.source | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 153 | override def heading_level: Option[Int] = Document_Structure.heading_level(command) | 
| 63610 | 154 | } | 
| 155 | ||
| 156 | def parse_sections( | |
| 157 | syntax: Outer_Syntax, | |
| 158 | node_name: Document.Node.Name, | |
| 75393 | 159 | text: CharSequence | 
| 160 |   ): List[Document] = {
 | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 161 | val sections = new Sections | 
| 63610 | 162 | |
| 163 |     for { span <- syntax.parse_spans(text) } {
 | |
| 164 | sections.add( | |
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 165 | new Command_Item(Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span))) | 
| 63610 | 166 | } | 
| 167 | sections.result() | |
| 168 | } | |
| 169 | ||
| 63604 | 170 | |
| 63610 | 171 | /* ML sections */ | 
| 172 | ||
| 75393 | 173 |   private class ML_Item(token: ML_Lex.Token, level: Option[Int]) extends Item {
 | 
| 63610 | 174 | override def source: String = token.source | 
| 175 | override def heading_level: Option[Int] = level | |
| 176 | } | |
| 177 | ||
| 75393 | 178 |   def parse_ml_sections(SML: Boolean, text: CharSequence): List[Document] = {
 | 
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
76914diff
changeset | 179 | val sections = new Sections | 
| 63610 | 180 | val nl = new ML_Item(ML_Lex.Token(ML_Lex.Kind.SPACE, "\n"), None) | 
| 181 | ||
| 182 | var context: Scan.Line_Context = Scan.Finished | |
| 183 |     for (line <- Library.separated_chunks(_ == '\n', text)) {
 | |
| 184 | val (toks, next_context) = ML_Lex.tokenize_line(SML, line, context) | |
| 185 | val level = | |
| 186 |         toks.filterNot(_.is_space) match {
 | |
| 187 | case List(tok) if tok.is_comment => | |
| 188 | val s = tok.source | |
| 75393 | 189 |             if (Codepoint.iterator(s).exists(c => Character.isLetter(c) || Character.isDigit(c))) {
 | 
| 63666 | 190 |               if (s.startsWith("(**** ") && s.endsWith(" ****)")) Some(0)
 | 
| 191 |               else if (s.startsWith("(*** ") && s.endsWith(" ***)")) Some(1)
 | |
| 192 |               else if (s.startsWith("(** ") && s.endsWith(" **)")) Some(2)
 | |
| 193 |               else if (s.startsWith("(* ") && s.endsWith(" *)")) Some(3)
 | |
| 194 | else None | |
| 195 | } | |
| 63610 | 196 | else None | 
| 197 | case _ => None | |
| 198 | } | |
| 199 | if (level.isDefined && context == Scan.Finished && next_context == Scan.Finished) | |
| 200 | toks.foreach(tok => sections.add(new ML_Item(tok, if (tok.is_comment) level else None))) | |
| 201 | else | |
| 202 | toks.foreach(tok => sections.add(new ML_Item(tok, None))) | |
| 203 | ||
| 204 | sections.add(nl) | |
| 205 | context = next_context | |
| 206 | } | |
| 207 | sections.result() | |
| 63604 | 208 | } | 
| 209 | } |