equal
deleted
inserted
replaced
16 fun is_utf8_control c = #"\128" <= c andalso c < #"\160"; |
16 fun is_utf8_control c = #"\128" <= c andalso c < #"\160"; |
17 |
17 |
18 fun explode string = |
18 fun explode string = |
19 let |
19 let |
20 fun char i = String.sub (string, i); |
20 fun char i = String.sub (string, i); |
21 fun string_range i j = String.substring (string, i, j - i); |
21 fun substring i j = String.substring (string, i, j - i); |
22 |
22 |
23 val n = size string; |
23 val n = size string; |
24 fun test pred i = i < n andalso pred (char i); |
24 fun test pred i = i < n andalso pred (char i); |
25 fun many pred i = if test pred i then many pred (i + 1) else i; |
25 fun many pred i = if test pred i then many pred (i + 1) else i; |
26 fun maybe pred i = if test pred i then i + 1 else i; |
26 fun maybe_char c i = if test (fn c' => c = c') i then i + 1 else i; |
27 fun maybe_char c = maybe (fn c' => c = c'); |
|
28 fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i; |
27 fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i; |
29 |
28 |
30 fun scan i = |
29 fun scan i = |
31 if i < n then |
30 if i < n then |
32 let val ch = char i in |
31 let val ch = char i in |
37 test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2)) |
36 test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2)) |
38 then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2) |
37 then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2) |
39 (*utf8*) |
38 (*utf8*) |
40 else if is_utf8 ch then |
39 else if is_utf8 ch then |
41 let val j = many is_utf8_trailer (i + 1) |
40 let val j = many is_utf8_trailer (i + 1) |
42 in string_range i j :: scan j end |
41 in substring i j :: scan j end |
43 (*named symbol*) |
42 (*named symbol*) |
44 else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then |
43 else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then |
45 let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">" |
44 let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">" |
46 in string_range i j :: scan j end |
45 in substring i j :: scan j end |
47 (*single character*) |
46 (*single character*) |
48 else String.str ch :: scan (i + 1) |
47 else String.str ch :: scan (i + 1) |
49 end |
48 end |
50 else []; |
49 else []; |
51 in scan 0 end; |
50 in scan 0 end; |