src/Pure/General/symbol_explode.ML
changeset 74170 09d4175f473e
parent 63936 b87784e19a77
child 77847 3bb6468d202e
equal deleted inserted replaced
74169:43fe7388458f 74170:09d4175f473e
    16 fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
    16 fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
    17 
    17 
    18 fun explode string =
    18 fun explode string =
    19   let
    19   let
    20     fun char i = String.sub (string, i);
    20     fun char i = String.sub (string, i);
    21     fun string_range i j = String.substring (string, i, j - i);
    21     fun substring i j = String.substring (string, i, j - i);
    22 
    22 
    23     val n = size string;
    23     val n = size string;
    24     fun test pred i = i < n andalso pred (char i);
    24     fun test pred i = i < n andalso pred (char i);
    25     fun many pred i = if test pred i then many pred (i + 1) else i;
    25     fun many pred i = if test pred i then many pred (i + 1) else i;
    26     fun maybe pred i = if test pred i then i + 1 else i;
    26     fun maybe_char c i = if test (fn c' => c = c') i then i + 1 else i;
    27     fun maybe_char c = maybe (fn c' => c = c');
       
    28     fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
    27     fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
    29 
    28 
    30     fun scan i =
    29     fun scan i =
    31       if i < n then
    30       if i < n then
    32         let val ch = char i in
    31         let val ch = char i in
    37             test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
    36             test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
    38           then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2)
    37           then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2)
    39           (*utf8*)
    38           (*utf8*)
    40           else if is_utf8 ch then
    39           else if is_utf8 ch then
    41             let val j = many is_utf8_trailer (i + 1)
    40             let val j = many is_utf8_trailer (i + 1)
    42             in string_range i j :: scan j end
    41             in substring i j :: scan j end
    43           (*named symbol*)
    42           (*named symbol*)
    44           else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
    43           else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
    45             let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
    44             let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
    46             in string_range i j :: scan j end
    45             in substring i j :: scan j end
    47           (*single character*)
    46           (*single character*)
    48           else String.str ch :: scan (i + 1)
    47           else String.str ch :: scan (i + 1)
    49         end
    48         end
    50       else [];
    49       else [];
    51   in scan 0 end;
    50   in scan 0 end;