392 {- Title: Isabelle/Symbols.hs |
392 {- Title: Isabelle/Symbols.hs |
393 Author: Makarius |
393 Author: Makarius |
394 LICENSE: BSD 3-clause (Isabelle) |
394 LICENSE: BSD 3-clause (Isabelle) |
395 |
395 |
396 Isabelle text symbols. |
396 Isabelle text symbols. |
|
397 |
|
398 See \<^file>\<open>$ISABELLE_HOME/src/Pure/General/symbol.ML\<close> |
|
399 and \<^file>\<open>$ISABELLE_HOME/src/Pure/General/symbol_explode.ML\<close>. |
397 -} |
400 -} |
398 |
401 |
399 module Isabelle.Symbol where |
402 {-# LANGUAGE OverloadedStrings #-} |
|
403 |
|
404 module Isabelle.Symbol ( |
|
405 is_ascii_letter, is_ascii_digit, is_ascii_hex, is_ascii_quasi, |
|
406 is_ascii_blank, is_ascii_line_terminator, is_ascii_letdig, |
|
407 is_ascii_identifier, |
|
408 |
|
409 Symbol, explode |
|
410 ) |
|
411 where |
|
412 |
|
413 import Data.Word (Word8) |
|
414 import qualified Isabelle.Bytes as Bytes |
|
415 import Isabelle.Bytes (Bytes) |
|
416 |
400 |
417 |
401 {- ASCII characters -} |
418 {- ASCII characters -} |
402 |
419 |
403 is_ascii_letter :: Char -> Bool |
420 is_ascii_letter :: Char -> Bool |
404 is_ascii_letter c = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
421 is_ascii_letter c = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
411 |
428 |
412 is_ascii_quasi :: Char -> Bool |
429 is_ascii_quasi :: Char -> Bool |
413 is_ascii_quasi c = c == '_' || c == '\'' |
430 is_ascii_quasi c = c == '_' || c == '\'' |
414 |
431 |
415 is_ascii_blank :: Char -> Bool |
432 is_ascii_blank :: Char -> Bool |
416 is_ascii_blank c = c `elem` " \t\n\11\f\r" |
433 is_ascii_blank c = c `elem` (" \t\n\11\f\r" :: String) |
417 |
434 |
418 is_ascii_line_terminator :: Char -> Bool |
435 is_ascii_line_terminator :: Char -> Bool |
419 is_ascii_line_terminator c = c == '\r' || c == '\n' |
436 is_ascii_line_terminator c = c == '\r' || c == '\n' |
420 |
437 |
421 is_ascii_letdig :: Char -> Bool |
438 is_ascii_letdig :: Char -> Bool |
422 is_ascii_letdig c = is_ascii_letter c || is_ascii_digit c || is_ascii_quasi c |
439 is_ascii_letdig c = is_ascii_letter c || is_ascii_digit c || is_ascii_quasi c |
423 |
440 |
424 is_ascii_identifier :: String -> Bool |
441 is_ascii_identifier :: String -> Bool |
425 is_ascii_identifier s = |
442 is_ascii_identifier s = |
426 not (null s) && is_ascii_letter (head s) && all is_ascii_letdig s |
443 not (null s) && is_ascii_letter (head s) && all is_ascii_letdig s |
|
444 |
|
445 |
|
446 {- explode symbols: ASCII, UTF8, named -} |
|
447 |
|
448 type Symbol = Bytes |
|
449 |
|
450 is_utf8 :: Word8 -> Bool |
|
451 is_utf8 b = b >= 128 |
|
452 |
|
453 is_utf8_trailer :: Word8 -> Bool |
|
454 is_utf8_trailer b = 128 <= b && b < 192 |
|
455 |
|
456 is_utf8_control :: Word8 -> Bool |
|
457 is_utf8_control b = 128 <= b && b < 160 |
|
458 |
|
459 (|>) :: a -> (a -> b) -> b |
|
460 x |> f = f x |
|
461 |
|
462 explode :: Bytes -> [Symbol] |
|
463 explode string = scan 0 |
|
464 where |
|
465 byte = Bytes.index string |
|
466 substring i j = |
|
467 if i == j - 1 then Bytes.singleton (byte i) |
|
468 else Bytes.pack (map byte [i .. j - 1]) |
|
469 |
|
470 n = Bytes.length string |
|
471 test pred i = i < n && pred (byte i) |
|
472 test_char pred i = i < n && pred (Bytes.char (byte i)) |
|
473 many pred i = if test pred i then many pred (i + 1) else i |
|
474 maybe_char c i = if test_char (== c) i then i + 1 else i |
|
475 maybe_ascii_id i = |
|
476 if test_char is_ascii_letter i |
|
477 then many (is_ascii_letdig . Bytes.char) (i + 1) |
|
478 else i |
|
479 |
|
480 scan i = |
|
481 if i < n then |
|
482 let |
|
483 b = byte i |
|
484 c = Bytes.char b |
|
485 in |
|
486 {-encoded newline-} |
|
487 if c == '\r' then "\n" : scan (maybe_char '\n' (i + 1)) |
|
488 {-pseudo utf8: encoded ascii control-} |
|
489 else if b == 192 && test is_utf8_control (i + 1) && not (test is_utf8 (i + 2)) |
|
490 then Bytes.singleton (byte (i + 1) - 128) : scan (i + 2) |
|
491 {-utf8-} |
|
492 else if is_utf8 b then |
|
493 let j = many is_utf8_trailer (i + 1) |
|
494 in substring i j : scan j |
|
495 {-named symbol-} |
|
496 else if c == '\\' && test_char (== '<') (i + 1) then |
|
497 let j = (i + 2) |> maybe_char '^' |> maybe_ascii_id |> maybe_char '>' |
|
498 in substring i j : scan j |
|
499 {-single character-} |
|
500 else Bytes.singleton b : scan (i + 1) |
|
501 else [] |
427 \<close> |
502 \<close> |
428 |
503 |
429 generate_file "Isabelle/Buffer.hs" = \<open> |
504 generate_file "Isabelle/Buffer.hs" = \<open> |
430 {- Title: Isabelle/Buffer.hs |
505 {- Title: Isabelle/Buffer.hs |
431 Author: Makarius |
506 Author: Makarius |