src/Tools/Haskell/Haskell.thy
changeset 74172 c576a4e2ffbc
parent 74169 43fe7388458f
child 74173 8d03d548df1c
equal deleted inserted replaced
74171:a9e79c3645c4 74172:c576a4e2ffbc
   392 {-  Title:      Isabelle/Symbols.hs
   392 {-  Title:      Isabelle/Symbols.hs
   393     Author:     Makarius
   393     Author:     Makarius
   394     LICENSE:    BSD 3-clause (Isabelle)
   394     LICENSE:    BSD 3-clause (Isabelle)
   395 
   395 
   396 Isabelle text symbols.
   396 Isabelle text symbols.
       
   397 
       
   398 See \<^file>\<open>$ISABELLE_HOME/src/Pure/General/symbol.ML\<close>
       
   399 and \<^file>\<open>$ISABELLE_HOME/src/Pure/General/symbol_explode.ML\<close>.
   397 -}
   400 -}
   398 
   401 
   399 module Isabelle.Symbol where
   402 {-# LANGUAGE OverloadedStrings #-}
       
   403 
       
   404 module Isabelle.Symbol (
       
   405   is_ascii_letter, is_ascii_digit, is_ascii_hex, is_ascii_quasi,
       
   406   is_ascii_blank, is_ascii_line_terminator, is_ascii_letdig,
       
   407   is_ascii_identifier,
       
   408 
       
   409   Symbol, explode
       
   410 )
       
   411 where
       
   412 
       
   413 import Data.Word (Word8)
       
   414 import qualified Isabelle.Bytes as Bytes
       
   415 import Isabelle.Bytes (Bytes)
       
   416 
   400 
   417 
   401 {- ASCII characters -}
   418 {- ASCII characters -}
   402 
   419 
   403 is_ascii_letter :: Char -> Bool
   420 is_ascii_letter :: Char -> Bool
   404 is_ascii_letter c = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   421 is_ascii_letter c = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   411 
   428 
   412 is_ascii_quasi :: Char -> Bool
   429 is_ascii_quasi :: Char -> Bool
   413 is_ascii_quasi c = c == '_' || c == '\''
   430 is_ascii_quasi c = c == '_' || c == '\''
   414 
   431 
   415 is_ascii_blank :: Char -> Bool
   432 is_ascii_blank :: Char -> Bool
   416 is_ascii_blank c = c `elem` " \t\n\11\f\r"
   433 is_ascii_blank c = c `elem` (" \t\n\11\f\r" :: String)
   417 
   434 
   418 is_ascii_line_terminator :: Char -> Bool
   435 is_ascii_line_terminator :: Char -> Bool
   419 is_ascii_line_terminator c = c == '\r' || c == '\n'
   436 is_ascii_line_terminator c = c == '\r' || c == '\n'
   420 
   437 
   421 is_ascii_letdig :: Char -> Bool
   438 is_ascii_letdig :: Char -> Bool
   422 is_ascii_letdig c = is_ascii_letter c || is_ascii_digit c || is_ascii_quasi c
   439 is_ascii_letdig c = is_ascii_letter c || is_ascii_digit c || is_ascii_quasi c
   423 
   440 
   424 is_ascii_identifier :: String -> Bool
   441 is_ascii_identifier :: String -> Bool
   425 is_ascii_identifier s =
   442 is_ascii_identifier s =
   426   not (null s) && is_ascii_letter (head s) && all is_ascii_letdig s
   443   not (null s) && is_ascii_letter (head s) && all is_ascii_letdig s
       
   444 
       
   445 
       
   446 {- explode symbols: ASCII, UTF8, named -}
       
   447 
       
   448 type Symbol = Bytes
       
   449 
       
   450 is_utf8 :: Word8 -> Bool
       
   451 is_utf8 b = b >= 128
       
   452 
       
   453 is_utf8_trailer :: Word8 -> Bool
       
   454 is_utf8_trailer b = 128 <= b && b < 192
       
   455 
       
   456 is_utf8_control :: Word8 -> Bool
       
   457 is_utf8_control b = 128 <= b && b < 160
       
   458 
       
   459 (|>) :: a -> (a -> b) -> b
       
   460 x |> f = f x
       
   461 
       
   462 explode :: Bytes -> [Symbol]
       
   463 explode string = scan 0
       
   464   where
       
   465     byte = Bytes.index string
       
   466     substring i j =
       
   467       if i == j - 1 then Bytes.singleton (byte i)
       
   468       else Bytes.pack (map byte [i .. j - 1])
       
   469 
       
   470     n = Bytes.length string
       
   471     test pred i = i < n && pred (byte i)
       
   472     test_char pred i = i < n && pred (Bytes.char (byte i))
       
   473     many pred i = if test pred i then many pred (i + 1) else i
       
   474     maybe_char c i = if test_char (== c) i then i + 1 else i
       
   475     maybe_ascii_id i =
       
   476       if test_char is_ascii_letter i
       
   477       then many (is_ascii_letdig . Bytes.char) (i + 1)
       
   478       else i
       
   479 
       
   480     scan i =
       
   481       if i < n then
       
   482         let
       
   483           b = byte i
       
   484           c = Bytes.char b
       
   485         in
       
   486           {-encoded newline-}
       
   487           if c == '\r' then "\n" : scan (maybe_char '\n' (i + 1))
       
   488           {-pseudo utf8: encoded ascii control-}
       
   489           else if b == 192 && test is_utf8_control (i + 1) && not (test is_utf8 (i + 2))
       
   490           then Bytes.singleton (byte (i + 1) - 128) : scan (i + 2)
       
   491           {-utf8-}
       
   492           else if is_utf8 b then
       
   493             let j = many is_utf8_trailer (i + 1)
       
   494             in substring i j : scan j
       
   495           {-named symbol-}
       
   496           else if c == '\\' && test_char (== '<') (i + 1) then
       
   497             let j = (i + 2) |> maybe_char '^' |> maybe_ascii_id |> maybe_char '>'
       
   498             in substring i j : scan j
       
   499           {-single character-}
       
   500           else Bytes.singleton b : scan (i + 1)
       
   501       else []
   427 \<close>
   502 \<close>
   428 
   503 
   429 generate_file "Isabelle/Buffer.hs" = \<open>
   504 generate_file "Isabelle/Buffer.hs" = \<open>
   430 {-  Title:      Isabelle/Buffer.hs
   505 {-  Title:      Isabelle/Buffer.hs
   431     Author:     Makarius
   506     Author:     Makarius