more tight implementation of symbol explode operation (without support for raw symbols);
authorwenzelm
Wed, 21 Sep 2016 20:33:44 +0200
changeset 63932 6040db6b929d
parent 63927 0efb482beb84
child 63933 e149e3e320a3
more tight implementation of symbol explode operation (without support for raw symbols);
src/Pure/General/symbol_explode.ML
src/Pure/ROOT.ML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/symbol_explode.ML	Wed Sep 21 20:33:44 2016 +0200
@@ -0,0 +1,64 @@
+(*  Title:      Pure/General/symbol.ML
+    Author:     Makarius
+
+String explode operation for Isabelle symbols.
+*)
+
+signature SYMBOL_EXPLODE =
+sig
+  val symbol_explode: string -> string list
+end;
+
+structure Symbol_Explode: SYMBOL_EXPLODE =
+struct
+
+local
+
+fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z";
+fun is_ascii_letdig c =
+  is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'";
+
+fun is_utf8 c = c >= #"\128";
+fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192";
+fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
+
+in
+
+fun symbol_explode string =
+  let
+    fun char i = String.sub (string, i);
+    fun string_range i j = String.substring (string, i, j - i);
+
+    val n = size string;
+    fun test pred i = i < n andalso pred (char i);
+    fun many pred i = if test pred i then many pred (i + 1) else i;
+    fun maybe pred i = if test pred i then i + 1 else i;
+    fun maybe_char c = maybe (fn c' => c = c');
+    fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
+
+    fun explode i =
+      if i < n then
+        let val ch = char i in
+          (*encoded newline*)
+          if ch = #"\^M" then "\n" :: explode (maybe_char #"\n" (i + 1))
+          (*pseudo utf8: encoded ascii control*)
+          else if ch = #"\192" andalso
+            test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
+          then chr (Char.ord (char (i + 1)) - 128) :: explode (i + 2)
+          (*utf8*)
+          else if is_utf8 ch then
+            let val j = many is_utf8_trailer (i + 1)
+            in string_range i j :: explode j end
+          (*named symbol*)
+          else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
+            let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
+            in string_range i j :: explode j end
+          (*single character*)
+          else String.str ch :: explode (i + 1)
+        end
+      else [];
+  in explode 0 end;
+
+end;
+
+end;
--- a/src/Pure/ROOT.ML	Wed Sep 21 14:20:07 2016 +0100
+++ b/src/Pure/ROOT.ML	Wed Sep 21 20:33:44 2016 +0200
@@ -10,6 +10,7 @@
 ML_file "System/distribution.ML";
 
 ML_file "General/basics.ML";
+ML_file "General/symbol_explode.ML";
 
 ML_file "Concurrent/multithreading.ML";
 ML_file "Concurrent/unsynchronized.ML";