separate tokenization and language context for SML: no symbols, no antiquotes;
authorwenzelm
Tue, 25 Mar 2014 16:11:00 +0100
changeset 56278 2576d3a40ed6
parent 56277 c4f75e733812
child 56279 b4d874f6c6be
separate tokenization and language context for SML: no symbols, no antiquotes;
src/Doc/antiquote_setup.ML
src/HOL/ex/Cartouche_Examples.thy
src/Pure/General/completion.scala
src/Pure/Isar/attrib.ML
src/Pure/Isar/isar_cmd.ML
src/Pure/Isar/method.ML
src/Pure/ML/ml_context.ML
src/Pure/ML/ml_lex.ML
src/Pure/ML/ml_lex.scala
src/Pure/PIDE/markup.ML
src/Pure/PIDE/markup.scala
src/Pure/Thy/thy_output.ML
src/Tools/jEdit/src/isabelle.scala
src/Tools/jEdit/src/rendering.scala
src/Tools/jEdit/src/token_markup.scala
--- a/src/Doc/antiquote_setup.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Doc/antiquote_setup.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -78,7 +78,7 @@
   | _ => error ("Single ML name expected in input: " ^ quote txt));
 
 fun prep_ml source =
-  (#1 (Symbol_Pos.source_content source), ML_Lex.read_source source);
+  (#1 (Symbol_Pos.source_content source), ML_Lex.read_source false source);
 
 fun index_ml name kind ml = Thy_Output.antiquotation name
   (Scan.lift (Args.name_source_position -- Scan.option (Args.colon |-- Args.name_source_position)))
--- a/src/HOL/ex/Cartouche_Examples.thy	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/HOL/ex/Cartouche_Examples.thy	Tue Mar 25 16:11:00 2014 +0100
@@ -118,7 +118,7 @@
       let
         val toks =
           ML_Lex.read Position.none "fn _ => (" @
-          ML_Lex.read_source source @
+          ML_Lex.read_source false source @
           ML_Lex.read Position.none ");";
         val _ = ML_Context.eval_in (SOME context) {SML = false, verbose = false} (#pos source) toks;
       in "" end);
@@ -189,7 +189,7 @@
       val ctxt' = ctxt |> Context.proof_map
         (ML_Context.expression (#pos source)
           "fun tactic (ctxt : Proof.context) : tactic"
-          "Context.map_proof (ML_Tactic.set tactic)" (ML_Lex.read_source source));
+          "Context.map_proof (ML_Tactic.set tactic)" (ML_Lex.read_source false source));
     in Data.get ctxt' ctxt end;
 end;
 *}
--- a/src/Pure/General/completion.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/General/completion.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -193,6 +193,7 @@
     val inner = Language_Context(Markup.Language.UNKNOWN, true, false)
     val ML_outer = Language_Context(Markup.Language.ML, false, true)
     val ML_inner = Language_Context(Markup.Language.ML, true, false)
+    val SML_outer = Language_Context(Markup.Language.SML, false, false)
   }
 
   sealed case class Language_Context(language: String, symbols: Boolean, antiquotes: Boolean)
--- a/src/Pure/Isar/attrib.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/Isar/attrib.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -181,7 +181,7 @@
     "val (name, scan, comment): binding * attribute context_parser * string"
     "Context.map_theory (Attrib.setup name scan comment)"
     (ML_Lex.read Position.none ("(" ^ ML_Syntax.make_binding name ^ ", ") @
-      ML_Lex.read_source source @
+      ML_Lex.read_source false source @
       ML_Lex.read Position.none (", " ^ ML_Syntax.print_string cmt ^ ")")));
 
 
--- a/src/Pure/Isar/isar_cmd.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/Isar/isar_cmd.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -67,12 +67,12 @@
 (* generic setup *)
 
 fun global_setup source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source) "val setup: theory -> theory" "Context.map_theory setup"
   |> Context.theory_map;
 
 fun local_setup source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source) "val setup: local_theory -> local_theory" "Context.map_proof setup"
   |> Context.proof_map;
 
@@ -80,35 +80,35 @@
 (* translation functions *)
 
 fun parse_ast_translation source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val parse_ast_translation: (string * (Proof.context -> Ast.ast list -> Ast.ast)) list"
     "Context.map_theory (Sign.parse_ast_translation parse_ast_translation)"
   |> Context.theory_map;
 
 fun parse_translation source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val parse_translation: (string * (Proof.context -> term list -> term)) list"
     "Context.map_theory (Sign.parse_translation parse_translation)"
   |> Context.theory_map;
 
 fun print_translation source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val print_translation: (string * (Proof.context -> term list -> term)) list"
     "Context.map_theory (Sign.print_translation print_translation)"
   |> Context.theory_map;
 
 fun typed_print_translation source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val typed_print_translation: (string * (Proof.context -> typ -> term list -> term)) list"
     "Context.map_theory (Sign.typed_print_translation typed_print_translation)"
   |> Context.theory_map;
 
 fun print_ast_translation source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val print_ast_translation: (string * (Proof.context -> Ast.ast list -> Ast.ast)) list"
     "Context.map_theory (Sign.print_ast_translation print_ast_translation)"
@@ -135,7 +135,7 @@
 
 fun oracle (name, pos) source =
   let
-    val body = ML_Lex.read_source source;
+    val body = ML_Lex.read_source false source;
     val ants =
       ML_Lex.read Position.none
        ("local\n\
@@ -162,7 +162,7 @@
 (* declarations *)
 
 fun declaration {syntax, pervasive} source =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val declaration: Morphism.declaration"
     ("Context.map_proof (Local_Theory.declaration {syntax = " ^ Bool.toString syntax ^ ", \
@@ -173,7 +173,7 @@
 (* simprocs *)
 
 fun simproc_setup name lhss source identifier =
-  ML_Lex.read_source source
+  ML_Lex.read_source false source
   |> ML_Context.expression (#pos source)
     "val proc: Morphism.morphism -> Proof.context -> cterm -> thm option"
     ("Context.map_proof (Simplifier.def_simproc_cmd {name = " ^ ML_Syntax.make_binding name ^ ", \
--- a/src/Pure/Isar/method.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/Isar/method.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -269,7 +269,7 @@
     val ctxt' = ctxt |> Context.proof_map
       (ML_Context.expression (#pos source)
         "fun tactic (facts: thm list) : tactic"
-        "Context.map_proof (Method.set_tactic tactic)" (ML_Lex.read_source source));
+        "Context.map_proof (Method.set_tactic tactic)" (ML_Lex.read_source false source));
   in Context.setmp_thread_data (SOME (Context.Proof ctxt)) (ML_Tactic.get ctxt') end;
 
 fun tactic source ctxt = METHOD (ml_tactic source ctxt);
@@ -368,7 +368,7 @@
     "val (name, scan, comment): binding * (Proof.context -> Proof.method) context_parser * string"
     "Context.map_theory (Method.setup name scan comment)"
     (ML_Lex.read Position.none ("(" ^ ML_Syntax.make_binding name ^ ", ") @
-      ML_Lex.read_source source @
+      ML_Lex.read_source false source @
       ML_Lex.read Position.none (", " ^ ML_Syntax.print_string cmt ^ ")")));
 
 
--- a/src/Pure/ML/ml_context.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/ML/ml_context.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -176,7 +176,7 @@
   in eval flags pos (ML_Lex.read pos (File.read path)) end;
 
 fun eval_source flags source =
-  eval flags (#pos source) (ML_Lex.read_source source);
+  eval flags (#pos source) (ML_Lex.read_source (#SML flags) source);
 
 fun eval_in ctxt flags pos ants =
   Context.setmp_thread_data (Option.map Context.Proof ctxt)
--- a/src/Pure/ML/ml_lex.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/ML/ml_lex.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -26,7 +26,7 @@
       Source.source) Source.source
   val tokenize: string -> token list
   val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list
-  val read_source: Symbol_Pos.source -> token Antiquote.antiquote list
+  val read_source: bool -> Symbol_Pos.source -> token Antiquote.antiquote list
 end;
 
 structure ML_Lex: ML_LEX =
@@ -132,7 +132,7 @@
 
 local
 
-val token_kind_markup =
+fun token_kind_markup SML =
  fn Keyword   => (Markup.empty, "")
   | Ident     => (Markup.empty, "")
   | LongIdent => (Markup.empty, "")
@@ -141,18 +141,18 @@
   | Int       => (Markup.ML_numeral, "")
   | Real      => (Markup.ML_numeral, "")
   | Char      => (Markup.ML_char, "")
-  | String    => (Markup.ML_string, "")
+  | String    => (if SML then Markup.SML_string else Markup.ML_string, "")
   | Space     => (Markup.empty, "")
-  | Comment   => (Markup.ML_comment, "")
+  | Comment   => (if SML then Markup.SML_comment else Markup.ML_comment, "")
   | Error msg => (Markup.bad, msg)
   | EOF       => (Markup.empty, "");
 
 in
 
-fun report_of_token (tok as Token ((pos, _), (kind, x))) =
+fun report_of_token SML (tok as Token ((pos, _), (kind, x))) =
   let
     val (markup, txt) =
-      if not (is_keyword tok) then token_kind_markup kind
+      if not (is_keyword tok) then token_kind_markup SML kind
       else if is_delimiter tok then (Markup.ML_delimiter, "")
       else if member (op =) keywords2 x then (Markup.ML_keyword2, "")
       else if member (op =) keywords3 x then (Markup.ML_keyword3, "")
@@ -291,15 +291,7 @@
   Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
   >> (single o token (Error msg));
 
-in
-
-fun source src =
-  Symbol_Pos.source (Position.line 1) src
-  |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover));
-
-val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
-
-fun read pos text =
+fun gen_read SML pos text =
   let
     val syms = Symbol_Pos.explode (text, pos);
     val termination =
@@ -309,17 +301,32 @@
           val pos1 = List.last syms |-> Position.advance;
           val pos2 = Position.advance Symbol.space pos1;
         in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end;
+
+    val scan = if SML then scan_ml >> Antiquote.Text else scan_antiq;
+    fun check (Antiquote.Text tok) = (check_content_of tok; if SML then () else warn tok)
+      | check _ = ();
     val input =
-      (Source.of_list syms
-        |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_antiq))
-          (SOME (false, fn msg => recover msg >> map Antiquote.Text))
-        |> Source.exhaust
-        |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token))
-        |> tap (List.app (fn Antiquote.Text tok => (check_content_of tok; warn tok) | _ => ())));
+      Source.of_list syms
+      |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan))
+        (SOME (false, fn msg => recover msg >> map Antiquote.Text))
+      |> Source.exhaust
+      |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token SML))
+      |> tap (List.app check);
   in input @ termination end;
 
-fun read_source {delimited, text, pos} =
-  (Position.report pos (Markup.language_ML delimited); read pos text);
+in
+
+fun source src =
+  Symbol_Pos.source (Position.line 1) src
+  |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover));
+
+val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
+
+val read = gen_read false;
+
+fun read_source SML {delimited, text, pos} =
+  (Position.report pos ((if SML then Markup.language_SML else Markup.language_ML) delimited);
+   gen_read SML pos text);
 
 end;
 
--- a/src/Pure/ML/ml_lex.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/ML/ml_lex.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -239,13 +239,15 @@
 
     def token: Parser[Token] = delimited_token | other_token
 
-    def token_line(ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] =
+    def token_line(SML: Boolean, ctxt: Scan.Line_Context): Parser[(Token, Scan.Line_Context)] =
     {
       val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished))
 
-      ml_string_line(ctxt) |
-        (ml_comment_line(ctxt) |
-          (ml_antiq_start(ctxt) | (ml_antiq_stop(ctxt) | (ml_antiq_body(ctxt) | other))))
+      if (SML) ml_string_line(ctxt) | (ml_comment_line(ctxt) | other)
+      else
+        ml_string_line(ctxt) |
+          (ml_comment_line(ctxt) |
+            (ml_antiq_start(ctxt) | (ml_antiq_stop(ctxt) | (ml_antiq_body(ctxt) | other))))
     }
   }
 
@@ -260,14 +262,14 @@
     }
   }
 
-  def tokenize_line(input: CharSequence, context: Scan.Line_Context)
+  def tokenize_line(SML: Boolean, input: CharSequence, context: Scan.Line_Context)
     : (List[Token], Scan.Line_Context) =
   {
     var in: Reader[Char] = new CharSequenceReader(input)
     val toks = new mutable.ListBuffer[Token]
     var ctxt = context
     while (!in.atEnd) {
-      Parsers.parse(Parsers.token_line(ctxt), in) match {
+      Parsers.parse(Parsers.token_line(SML, ctxt), in) match {
         case Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
         case Parsers.NoSuccess(_, rest) =>
           error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
--- a/src/Pure/PIDE/markup.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/PIDE/markup.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -33,6 +33,7 @@
   val language_term: bool -> T
   val language_prop: bool -> T
   val language_ML: bool -> T
+  val language_SML: bool -> T
   val language_document: bool -> T
   val language_antiquotation: T
   val language_text: bool -> T
@@ -93,6 +94,8 @@
   val ML_charN: string val ML_char: T
   val ML_stringN: string val ML_string: T
   val ML_commentN: string val ML_comment: T
+  val SML_stringN: string val SML_string: T
+  val SML_commentN: string val SML_comment: T
   val ML_defN: string
   val ML_openN: string
   val ML_structureN: string
@@ -287,6 +290,7 @@
 val language_term = language' {name = "term", symbols = true, antiquotes = false};
 val language_prop = language' {name = "prop", symbols = true, antiquotes = false};
 val language_ML = language' {name = "ML", symbols = false, antiquotes = true};
+val language_SML = language' {name = "SML", symbols = false, antiquotes = false};
 val language_document = language' {name = "document", symbols = false, antiquotes = true};
 val language_antiquotation =
   language {name = "antiquotation", symbols = true, antiquotes = false, delimited = true};
@@ -398,6 +402,8 @@
 val (ML_charN, ML_char) = markup_elem "ML_char";
 val (ML_stringN, ML_string) = markup_elem "ML_string";
 val (ML_commentN, ML_comment) = markup_elem "ML_comment";
+val (SML_stringN, SML_string) = markup_elem "SML_string";
+val (SML_commentN, SML_comment) = markup_elem "SML_comment";
 
 val ML_defN = "ML_def";
 val ML_openN = "ML_open";
--- a/src/Pure/PIDE/markup.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/PIDE/markup.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -101,6 +101,7 @@
   object Language
   {
     val ML = "ML"
+    val SML = "SML"
     val UNKNOWN = "unknown"
 
     def unapply(markup: Markup): Option[(String, Boolean, Boolean, Boolean)] =
@@ -202,6 +203,8 @@
   val ML_CHAR = "ML_char"
   val ML_STRING = "ML_string"
   val ML_COMMENT = "ML_comment"
+  val SML_STRING = "SML_string"
+  val SML_COMMENT = "SML_comment"
 
   val ML_DEF = "ML_def"
   val ML_OPEN = "ML_open"
--- a/src/Pure/Thy/thy_output.ML	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Pure/Thy/thy_output.ML	Tue Mar 25 16:11:00 2014 +0100
@@ -648,7 +648,7 @@
 
 fun ml_enclose bg en source =
   ML_Lex.read Position.none bg @
-  ML_Lex.read_source source @
+  ML_Lex.read_source false source @
   ML_Lex.read Position.none en;
 
 in
--- a/src/Tools/jEdit/src/isabelle.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Tools/jEdit/src/isabelle.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -36,6 +36,10 @@
     Outer_Syntax.init().no_tokens.
       set_language_context(Completion.Language_Context.ML_outer)
 
+  private lazy val sml_syntax: Outer_Syntax =
+    Outer_Syntax.init().no_tokens.
+      set_language_context(Completion.Language_Context.SML_outer)
+
   private lazy val news_syntax: Outer_Syntax =
     Outer_Syntax.init().no_tokens
 
@@ -49,6 +53,7 @@
       case "isabelle-ml" => Some(ml_syntax)
       case "isabelle-news" => Some(news_syntax)
       case "isabelle-output" => None
+      case "sml" => Some(sml_syntax)
       case _ => None
     }
 
--- a/src/Tools/jEdit/src/rendering.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Tools/jEdit/src/rendering.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -681,7 +681,9 @@
       Markup.ML_NUMERAL -> inner_numeral_color,
       Markup.ML_CHAR -> inner_quoted_color,
       Markup.ML_STRING -> inner_quoted_color,
-      Markup.ML_COMMENT -> inner_comment_color)
+      Markup.ML_COMMENT -> inner_comment_color,
+      Markup.SML_STRING -> inner_quoted_color,
+      Markup.SML_COMMENT -> inner_comment_color)
 
   private lazy val text_color_elements =
     Document.Elements(text_colors.keySet)
--- a/src/Tools/jEdit/src/token_markup.scala	Tue Mar 25 15:15:33 2014 +0100
+++ b/src/Tools/jEdit/src/token_markup.scala	Tue Mar 25 16:11:00 2014 +0100
@@ -204,7 +204,7 @@
       {
         val (styled_tokens, context1) =
           if (mode == "isabelle-ml" || mode == "sml") {
-            val (tokens, ctxt1) = ML_Lex.tokenize_line(line, line_ctxt.get)
+            val (tokens, ctxt1) = ML_Lex.tokenize_line(mode == "sml", line, line_ctxt.get)
             val styled_tokens = tokens.map(tok => (Rendering.ml_token_markup(tok), tok.source))
             (styled_tokens, new Line_Context(Some(ctxt1)))
           }