more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
authorwenzelm
Wed, 10 Jul 2013 16:25:26 +0200
changeset 52616 3ac2878764f9
parent 52610 78a64edf431f
child 52617 42e02ddd1568
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
src/Pure/General/scan.scala
src/Pure/General/symbol.ML
src/Pure/General/symbol.scala
src/Pure/General/symbol_pos.ML
src/Pure/Syntax/lexicon.ML
src/Pure/term.ML
--- a/src/Pure/General/scan.scala	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/General/scan.scala	Wed Jul 10 16:25:26 2013 +0200
@@ -348,7 +348,18 @@
     private def other_token(is_command: String => Boolean)
       : Parser[Token] =
     {
-      val id = one(Symbol.is_letter) ~ many(Symbol.is_letdig) ^^ { case x ~ y => x + y }
+      val letdigs1 = many1(Symbol.is_letdig)
+      val sub_sup =
+        one(s =>
+          s == Symbol.sub_decoded || s == "\\<^sub>" ||
+          s == Symbol.isub_decoded || s == "\\<^isub>" ||
+          s == Symbol.sup_decoded || s == "\\<^sup>" ||
+          s == Symbol.isup_decoded || s == "\\<^isup>")
+      val id =
+        one(Symbol.is_letter) ~
+          (rep(letdigs1 | (sub_sup ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
+        { case x ~ y => x + y }
+
       val nat = many1(Symbol.is_digit)
       val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
       val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
--- a/src/Pure/General/symbol.ML	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/General/symbol.ML	Wed Jul 10 16:25:26 2013 +0200
@@ -46,7 +46,6 @@
   val decode: symbol -> sym
   datatype kind = Letter | Digit | Quasi | Blank | Other
   val kind: symbol -> kind
-  val is_letter_symbol: symbol -> bool
   val is_letter: symbol -> bool
   val is_digit: symbol -> bool
   val is_quasi: symbol -> bool
@@ -377,9 +376,7 @@
       "\\<Upsilon>",
       "\\<Phi>",
       "\\<Psi>",
-      "\\<Omega>",
-      "\\<^isub>",
-      "\\<^isup>"
+      "\\<Omega>"
     ];
 in
 
@@ -520,6 +517,7 @@
 
 fun symbolic_end (_ :: "\\<^sub>" :: _) = true
   | symbolic_end (_ :: "\\<^isub>" :: _) = true
+  | symbolic_end (_ :: "\\<^sup>" :: _) = true
   | symbolic_end (_ :: "\\<^isup>" :: _) = true
   | symbolic_end (s :: _) = is_symbolic s
   | symbolic_end [] = false;
--- a/src/Pure/General/symbol.scala	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/General/symbol.scala	Wed Jul 10 16:25:26 2013 +0200
@@ -347,9 +347,7 @@
       "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
       "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
       "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
-      "\\<Psi>", "\\<Omega>",
-
-      "\\<^isub>", "\\<^isup>")
+      "\\<Psi>", "\\<Omega>")
 
     val blanks = recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n", "\\<^newline>")
 
--- a/src/Pure/General/symbol_pos.ML	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/General/symbol_pos.ML	Wed Jul 10 16:25:26 2013 +0200
@@ -215,51 +215,19 @@
 
 local
 
-val latin = Symbol.is_ascii_letter;
-val digit = Symbol.is_ascii_digit;
-fun underscore s = s = "_";
-fun prime s = s = "'";
-fun subscript s = s = "\\<^sub>" orelse s = "\\<^isub>";
-fun script s = s = "\\<^sub>" orelse s = "\\<^isub>" orelse s = "\\<^isup>";
-fun special_letter s = Symbol.is_letter_symbol s andalso not (script s);
-
-val scan_plain = Scan.one ((latin orf digit orf prime) o symbol) >> single;
-val scan_digit = Scan.one (digit o symbol) >> single;
-val scan_prime = Scan.one (prime o symbol) >> single;
-val scan_extended =
-  Scan.one ((latin orf digit orf prime orf underscore orf special_letter) o symbol) >> single;
-
-val scan_subscript =
-  Scan.one (subscript o symbol) --
-  Scan.one ((latin orf digit orf prime orf special_letter) o symbol)
-  >> (fn (x, y) => [x, y]);
-
-val scan_ident_part1 =
-  Scan.one (latin o symbol) ::: (Scan.repeat (scan_plain || scan_subscript) >> flat) ||
-  Scan.one (special_letter o symbol) :::
-    (Scan.repeat (scan_digit || scan_prime || scan_subscript) >> flat);
-
-val scan_ident_part2 =
-  Scan.repeat1 (scan_plain || scan_subscript) >> flat ||
-  scan_ident_part1;
+val letter = Scan.one (symbol #> Symbol.is_letter);
+val letdigs1 = Scan.many1 (symbol #> Symbol.is_letdig);
+val sub_sup =
+  Scan.one (symbol #>
+    (fn s => s = "\\<^sub>" orelse s = "\\<^isub>" orelse s = "\\<^sup>" orelse s = "\\<^isup>"));
 
 in
 
-val scan_ident0 =
-  Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol);
-
-val scan_ident1 =
-  Scan.one ((latin orf special_letter) o symbol) :::
-    (Scan.repeat (scan_extended || Scan.one (subscript o symbol) ::: scan_extended) >> flat);
-
-val scan_ident2 =
-  scan_ident_part1 @@@
-    (Scan.repeat (Scan.many1 (underscore o symbol) @@@ scan_ident_part2) >> flat);
+val scan_ident =
+  letter ::: (Scan.repeat (letdigs1 || sub_sup ::: letdigs1) >> flat);
 
 end;
 
-val scan_ident = scan_ident0;
-
 fun is_identifier s =
   Symbol.is_ascii_identifier s orelse
     (case try (Scan.finite stopper scan_ident) (explode (s, Position.none)) of
--- a/src/Pure/Syntax/lexicon.ML	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/Syntax/lexicon.ML	Wed Jul 10 16:25:26 2013 +0200
@@ -296,6 +296,7 @@
     fun chop_idx [] ds = idxname [] ds
       | chop_idx (cs as (_ :: "\\<^sub>" :: _)) ds = idxname cs ds
       | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds
+      | chop_idx (cs as (_ :: "\\<^sup>" :: _)) ds = idxname cs ds
       | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds
       | chop_idx (c :: cs) ds =
           if Symbol.is_digit c then chop_idx cs (c :: ds)
--- a/src/Pure/term.ML	Fri Jul 12 13:12:21 2013 +0200
+++ b/src/Pure/term.ML	Wed Jul 10 16:25:26 2013 +0200
@@ -980,6 +980,7 @@
       (case rev (Symbol.explode x) of
         _ :: "\\<^sub>" :: _ => false
       | _ :: "\\<^isub>" :: _ => false
+      | _ :: "\\<^sup>" :: _ => false
       | _ :: "\\<^isup>" :: _ => false
       | c :: _ => Symbol.is_digit c
       | _ => true);