# HG changeset patch # User wenzelm # Date 1629716203 -7200 # Node ID c576a4e2ffbcfd62ea29a5a62924c534d3e63a3a # Parent a9e79c3645c4a484947bb8739db808e9b836ed7c more Haskell operations; diff -r a9e79c3645c4 -r c576a4e2ffbc src/Tools/Haskell/Haskell.thy --- a/src/Tools/Haskell/Haskell.thy Mon Aug 23 12:54:28 2021 +0200 +++ b/src/Tools/Haskell/Haskell.thy Mon Aug 23 12:56:43 2021 +0200 @@ -394,9 +394,26 @@ LICENSE: BSD 3-clause (Isabelle) Isabelle text symbols. + +See \<^file>\$ISABELLE_HOME/src/Pure/General/symbol.ML\ +and \<^file>\$ISABELLE_HOME/src/Pure/General/symbol_explode.ML\. -} -module Isabelle.Symbol where +{-# LANGUAGE OverloadedStrings #-} + +module Isabelle.Symbol ( + is_ascii_letter, is_ascii_digit, is_ascii_hex, is_ascii_quasi, + is_ascii_blank, is_ascii_line_terminator, is_ascii_letdig, + is_ascii_identifier, + + Symbol, explode +) +where + +import Data.Word (Word8) +import qualified Isabelle.Bytes as Bytes +import Isabelle.Bytes (Bytes) + {- ASCII characters -} @@ -413,7 +430,7 @@ is_ascii_quasi c = c == '_' || c == '\'' is_ascii_blank :: Char -> Bool -is_ascii_blank c = c `elem` " \t\n\11\f\r" +is_ascii_blank c = c `elem` (" \t\n\11\f\r" :: String) is_ascii_line_terminator :: Char -> Bool is_ascii_line_terminator c = c == '\r' || c == '\n' @@ -424,6 +441,64 @@ is_ascii_identifier :: String -> Bool is_ascii_identifier s = not (null s) && is_ascii_letter (head s) && all is_ascii_letdig s + + +{- explode symbols: ASCII, UTF8, named -} + +type Symbol = Bytes + +is_utf8 :: Word8 -> Bool +is_utf8 b = b >= 128 + +is_utf8_trailer :: Word8 -> Bool +is_utf8_trailer b = 128 <= b && b < 192 + +is_utf8_control :: Word8 -> Bool +is_utf8_control b = 128 <= b && b < 160 + +(|>) :: a -> (a -> b) -> b +x |> f = f x + +explode :: Bytes -> [Symbol] +explode string = scan 0 + where + byte = Bytes.index string + substring i j = + if i == j - 1 then Bytes.singleton (byte i) + else Bytes.pack (map byte [i .. j - 1]) + + n = Bytes.length string + test pred i = i < n && pred (byte i) + test_char pred i = i < n && pred (Bytes.char (byte i)) + many pred i = if test pred i then many pred (i + 1) else i + maybe_char c i = if test_char (== c) i then i + 1 else i + maybe_ascii_id i = + if test_char is_ascii_letter i + then many (is_ascii_letdig . Bytes.char) (i + 1) + else i + + scan i = + if i < n then + let + b = byte i + c = Bytes.char b + in + {-encoded newline-} + if c == '\r' then "\n" : scan (maybe_char '\n' (i + 1)) + {-pseudo utf8: encoded ascii control-} + else if b == 192 && test is_utf8_control (i + 1) && not (test is_utf8 (i + 2)) + then Bytes.singleton (byte (i + 1) - 128) : scan (i + 2) + {-utf8-} + else if is_utf8 b then + let j = many is_utf8_trailer (i + 1) + in substring i j : scan j + {-named symbol-} + else if c == '\\' && test_char (== '<') (i + 1) then + let j = (i + 2) |> maybe_char '^' |> maybe_ascii_id |> maybe_char '>' + in substring i j : scan j + {-single character-} + else Bytes.singleton b : scan (i + 1) + else [] \ generate_file "Isabelle/Buffer.hs" = \