| author | paulson <lp15@cam.ac.uk> | 
| Thu, 10 Apr 2025 17:07:18 +0100 | |
| changeset 82470 | 785615e37846 | 
| parent 78912 | ff4496b25197 | 
| permissions | -rw-r--r-- | 
| 63604 | 1  | 
/* Title: Pure/Isar/document_structure.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Overall document structure.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
10  | 
import scala.collection.mutable  | 
|
11  | 
import scala.annotation.tailrec  | 
|
12  | 
||
13  | 
||
| 75393 | 14  | 
object Document_Structure {
 | 
| 63610 | 15  | 
/** general structure **/  | 
| 63604 | 16  | 
|
17  | 
  sealed abstract class Document { def length: Int }
 | 
|
| 75393 | 18  | 
  case class Block(name: String, text: String, body: List[Document]) extends Document {
 | 
19  | 
val length: Int = body.foldLeft(0)(_ + _.length)  | 
|
20  | 
}  | 
|
| 63610 | 21  | 
case class Atom(length: Int) extends Document  | 
| 63605 | 22  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
23  | 
def is_theory_command(command: Command): Boolean =  | 
| 
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
24  | 
command.span.is_keyword_kind(kind => Keyword.theory(kind) && !Keyword.theory_end(kind))  | 
| 63605 | 25  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
26  | 
def is_document_command(command: Command): Boolean =  | 
| 
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
27  | 
command.span.is_keyword_kind(Keyword.document)  | 
| 70148 | 28  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
29  | 
def is_diag_command(command: Command): Boolean =  | 
| 
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
30  | 
command.span.is_keyword_kind(Keyword.diag)  | 
| 71165 | 31  | 
|
| 69869 | 32  | 
def is_heading_command(command: Command): Boolean =  | 
33  | 
proper_heading_level(command).isDefined  | 
|
34  | 
||
| 69860 | 35  | 
def proper_heading_level(command: Command): Option[Int] =  | 
| 69859 | 36  | 
    command.span.name match {
 | 
37  | 
case Thy_Header.CHAPTER => Some(0)  | 
|
38  | 
case Thy_Header.SECTION => Some(1)  | 
|
39  | 
case Thy_Header.SUBSECTION => Some(2)  | 
|
40  | 
case Thy_Header.SUBSUBSECTION => Some(3)  | 
|
41  | 
case Thy_Header.PARAGRAPH => Some(4)  | 
|
42  | 
case Thy_Header.SUBPARAGRAPH => Some(5)  | 
|
43  | 
case _ => None  | 
|
44  | 
}  | 
|
| 69860 | 45  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
46  | 
def heading_level(command: Command): Option[Int] =  | 
| 
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
47  | 
proper_heading_level(command) orElse (if (is_theory_command(command)) Some(6) else None)  | 
| 69859 | 48  | 
|
| 63604 | 49  | 
|
| 63606 | 50  | 
|
| 63610 | 51  | 
/** context blocks **/  | 
| 63606 | 52  | 
|
53  | 
def parse_blocks(  | 
|
54  | 
syntax: Outer_Syntax,  | 
|
55  | 
node_name: Document.Node.Name,  | 
|
| 75393 | 56  | 
text: CharSequence  | 
57  | 
  ): List[Document] = {
 | 
|
| 63607 | 58  | 
def is_plain_theory(command: Command): Boolean =  | 
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
59  | 
is_theory_command(command) && !command.span.is_begin && !command.span.is_end  | 
| 63607 | 60  | 
|
61  | 
||
| 63606 | 62  | 
/* stack operations */  | 
63  | 
||
64  | 
def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document]  | 
|
65  | 
||
66  | 
var stack: List[(Command, mutable.ListBuffer[Document])] =  | 
|
67  | 
List((Command.empty, buffer()))  | 
|
68  | 
||
| 73340 | 69  | 
    def open(command: Command): Unit = { stack = (command, buffer()) :: stack }
 | 
| 63607 | 70  | 
|
| 63606 | 71  | 
def close(): Boolean =  | 
72  | 
      stack match {
 | 
|
73  | 
case (command, body) :: (_, body2) :: _ =>  | 
|
74  | 
body2 += Block(command.span.name, command.source, body.toList)  | 
|
75  | 
stack = stack.tail  | 
|
76  | 
true  | 
|
77  | 
case _ =>  | 
|
78  | 
false  | 
|
79  | 
}  | 
|
80  | 
||
| 73340 | 81  | 
def flush(): Unit = if (is_plain_theory(stack.head._1)) close()  | 
| 63607 | 82  | 
|
| 75393 | 83  | 
    def result(): List[Document] = {
 | 
| 63606 | 84  | 
      while (close()) { }
 | 
85  | 
stack.head._2.toList  | 
|
86  | 
}  | 
|
87  | 
||
| 75393 | 88  | 
    def add(command: Command): Unit = {
 | 
| 63607 | 89  | 
      if (command.span.is_begin || is_plain_theory(command)) { flush(); open(command) }
 | 
90  | 
      else if (command.span.is_end) { flush(); close() }
 | 
|
| 63606 | 91  | 
|
| 63610 | 92  | 
stack.head._2 += Atom(command.source.length)  | 
| 63606 | 93  | 
}  | 
94  | 
||
95  | 
||
96  | 
/* result structure */  | 
|
97  | 
||
98  | 
val spans = syntax.parse_spans(text)  | 
|
| 76914 | 99  | 
spans.foreach(span => add(Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span)))  | 
| 63606 | 100  | 
result()  | 
101  | 
}  | 
|
102  | 
||
103  | 
||
104  | 
||
| 63610 | 105  | 
/** section headings **/  | 
106  | 
||
| 75393 | 107  | 
  trait Item {
 | 
| 63610 | 108  | 
def name: String = ""  | 
109  | 
def source: String = ""  | 
|
110  | 
def heading_level: Option[Int] = None  | 
|
111  | 
}  | 
|
| 63604 | 112  | 
|
| 63610 | 113  | 
object No_Item extends Item  | 
| 63604 | 114  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
115  | 
  class Sections {
 | 
| 63610 | 116  | 
private def buffer(): mutable.ListBuffer[Document] = new mutable.ListBuffer[Document]  | 
| 63604 | 117  | 
|
| 63610 | 118  | 
private var stack: List[(Int, Item, mutable.ListBuffer[Document])] =  | 
119  | 
List((0, No_Item, buffer()))  | 
|
120  | 
||
| 75393 | 121  | 
    @tailrec private def close(level: Int => Boolean): Unit = {
 | 
| 63604 | 122  | 
      stack match {
 | 
| 63610 | 123  | 
case (lev, item, body) :: (_, _, body2) :: _ if level(lev) =>  | 
124  | 
body2 += Block(item.name, item.source, body.toList)  | 
|
| 63604 | 125  | 
stack = stack.tail  | 
126  | 
close(level)  | 
|
127  | 
case _ =>  | 
|
128  | 
}  | 
|
129  | 
}  | 
|
130  | 
||
| 75393 | 131  | 
    def result(): List[Document] = {
 | 
| 63604 | 132  | 
close(_ => true)  | 
133  | 
stack.head._3.toList  | 
|
134  | 
}  | 
|
135  | 
||
| 75393 | 136  | 
    def add(item: Item): Unit = {
 | 
| 63610 | 137  | 
      item.heading_level match {
 | 
| 63604 | 138  | 
case Some(i) =>  | 
139  | 
close(_ > i)  | 
|
| 63610 | 140  | 
stack = (i + 1, item, buffer()) :: stack  | 
| 63604 | 141  | 
case None =>  | 
142  | 
}  | 
|
| 63610 | 143  | 
stack.head._3 += Atom(item.source.length)  | 
| 63604 | 144  | 
}  | 
| 63610 | 145  | 
}  | 
| 63604 | 146  | 
|
147  | 
||
| 63610 | 148  | 
/* outer syntax sections */  | 
149  | 
||
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
150  | 
  private class Command_Item(command: Command) extends Item {
 | 
| 63610 | 151  | 
override def name: String = command.span.name  | 
152  | 
override def source: String = command.source  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
153  | 
override def heading_level: Option[Int] = Document_Structure.heading_level(command)  | 
| 63610 | 154  | 
}  | 
155  | 
||
156  | 
def parse_sections(  | 
|
157  | 
syntax: Outer_Syntax,  | 
|
158  | 
node_name: Document.Node.Name,  | 
|
| 75393 | 159  | 
text: CharSequence  | 
160  | 
  ): List[Document] = {
 | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
161  | 
val sections = new Sections  | 
| 63610 | 162  | 
|
163  | 
    for { span <- syntax.parse_spans(text) } {
 | 
|
164  | 
sections.add(  | 
|
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
165  | 
new Command_Item(Command(Document_ID.none, node_name, Command.Blobs_Info.empty, span)))  | 
| 63610 | 166  | 
}  | 
167  | 
sections.result()  | 
|
168  | 
}  | 
|
169  | 
||
| 63604 | 170  | 
|
| 63610 | 171  | 
/* ML sections */  | 
172  | 
||
| 75393 | 173  | 
  private class ML_Item(token: ML_Lex.Token, level: Option[Int]) extends Item {
 | 
| 63610 | 174  | 
override def source: String = token.source  | 
175  | 
override def heading_level: Option[Int] = level  | 
|
176  | 
}  | 
|
177  | 
||
| 75393 | 178  | 
  def parse_ml_sections(SML: Boolean, text: CharSequence): List[Document] = {
 | 
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
76914 
diff
changeset
 | 
179  | 
val sections = new Sections  | 
| 63610 | 180  | 
val nl = new ML_Item(ML_Lex.Token(ML_Lex.Kind.SPACE, "\n"), None)  | 
181  | 
||
182  | 
var context: Scan.Line_Context = Scan.Finished  | 
|
183  | 
    for (line <- Library.separated_chunks(_ == '\n', text)) {
 | 
|
184  | 
val (toks, next_context) = ML_Lex.tokenize_line(SML, line, context)  | 
|
185  | 
val level =  | 
|
186  | 
        toks.filterNot(_.is_space) match {
 | 
|
187  | 
case List(tok) if tok.is_comment =>  | 
|
188  | 
val s = tok.source  | 
|
| 75393 | 189  | 
            if (Codepoint.iterator(s).exists(c => Character.isLetter(c) || Character.isDigit(c))) {
 | 
| 63666 | 190  | 
              if (s.startsWith("(**** ") && s.endsWith(" ****)")) Some(0)
 | 
191  | 
              else if (s.startsWith("(*** ") && s.endsWith(" ***)")) Some(1)
 | 
|
192  | 
              else if (s.startsWith("(** ") && s.endsWith(" **)")) Some(2)
 | 
|
193  | 
              else if (s.startsWith("(* ") && s.endsWith(" *)")) Some(3)
 | 
|
194  | 
else None  | 
|
195  | 
}  | 
|
| 63610 | 196  | 
else None  | 
197  | 
case _ => None  | 
|
198  | 
}  | 
|
199  | 
if (level.isDefined && context == Scan.Finished && next_context == Scan.Finished)  | 
|
200  | 
toks.foreach(tok => sections.add(new ML_Item(tok, if (tok.is_comment) level else None)))  | 
|
201  | 
else  | 
|
202  | 
toks.foreach(tok => sections.add(new ML_Item(tok, None)))  | 
|
203  | 
||
204  | 
sections.add(nl)  | 
|
205  | 
context = next_context  | 
|
206  | 
}  | 
|
207  | 
sections.result()  | 
|
| 63604 | 208  | 
}  | 
209  | 
}  |