# HG changeset patch # User wenzelm # Date 910621978 -3600 # Node ID 24e4b1780d33428e1e7df5311b6e13598a22f33a # Parent 91113aa09371354e32a09e054451b0825187843f Outer lexical syntax for Isabelle/Isar. diff -r 91113aa09371 -r 24e4b1780d33 src/Pure/Isar/outer_lex.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Isar/outer_lex.ML Mon Nov 09 15:32:58 1998 +0100 @@ -0,0 +1,217 @@ +(* Title: Pure/Isar/outer_lex.ML + ID: $Id$ + Author: Markus Wenzel, TU Muenchen + +Outer lexical syntax for Isabelle/Isar. +*) + +signature OUTER_LEX = +sig + datatype token_kind = + Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | + String | Verbatim | Ignore | EOF + type token + val str_of_kind: token_kind -> string + val stopper: token * (token -> bool) + val not_eof: token -> bool + val position_of: token -> Position.T + val pos_of: token -> string + val is_kind: token_kind -> token -> bool + val keyword_pred: (string -> bool) -> token -> bool + val name_of: token -> string + val is_proper: token -> bool + val val_of: token -> string + val is_symid: string -> bool + val scan: Scan.lexicon -> + Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) + val source: bool -> (unit -> Scan.lexicon) -> Position.T -> (Symbol.symbol, 'a) Source.source -> + (token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source +end; + +structure OuterLex: OUTER_LEX = +struct + + +(** tokens **) + +(* datatype token *) + +datatype token_kind = + Keyword | Ident | LongIdent | SymIdent | Var | TextVar | TypeIdent | TypeVar | Nat | + String | Verbatim | Ignore | EOF; + +datatype token = Token of Position.T * (token_kind * string); + +val str_of_kind = + fn Keyword => "keyword" + | Ident => "identifier" + | LongIdent => "long identifier" + | SymIdent => "symbolic identifier" + | Var => "schematic variable" + | TextVar => "text variable" + | TypeIdent => "type variable" + | TypeVar => "schematic type variable" + | Nat => "number" + | String => "string" + | Verbatim => "verbatim text" + | Ignore => "ignored text" + | EOF => "end-of-file"; + + +(* eof token *) + +val eof = Token (Position.none, (EOF, "")); + +fun is_eof (Token (_, (EOF, _))) = true + | is_eof _ = false; + +val stopper = (eof, is_eof); +val not_eof = not o is_eof; + + +(* get position *) + +fun position_of (Token (pos, _)) = pos; +val pos_of = Position.str_of o position_of; + + +(* kind of token *) + +fun is_kind k (Token (_, (k', _))) = k = k'; + +fun keyword_pred pred (Token (_, (Keyword, x))) = pred x + | keyword_pred _ _ = false; + +fun name_of (Token (_, (k, _))) = str_of_kind k; + +fun is_proper (Token (_, (Ignore, _))) = false + | is_proper _ = true; + + +(* value of token *) + +fun val_of (Token (_, (_, x))) = x; + +fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x'; + + + +(** scanners **) + +fun change_prompt scan = Scan.prompt "# " scan; + + +(* diagnostics *) + +fun lex_err msg ((pos, cs), _) = "Outer lexical error" ^ Position.str_of pos ^ ": " ^ msg cs; + + +(* line numbering *) + +fun incr_line scan = Scan.depend (fn pos => scan >> pair (Position.inc pos)); +val keep_line = Scan.lift; + +val scan_blank = + incr_line ($$ "\n") || + keep_line (Scan.one Symbol.is_blank); + + +(* scan symbolic idents *) + +val sym_chars = explode "!#$%&*+-/:<=>?@^_`|~"; +fun is_sym_char s = s mem sym_chars; + +fun is_symid s = s <> "" andalso forall is_sym_char (Symbol.explode s); + +val scan_symid = Scan.any1 is_sym_char >> implode; + + +(* scan strings *) + +val scan_str = + scan_blank >> K Symbol.space || + keep_line ($$ "\\" |-- Scan.one Symbol.not_eof) || + keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf Symbol.not_eof)); + +val scan_string = + keep_line ($$ "\"") |-- + !! (lex_err (K "missing quote at end of string")) + (change_prompt ((Scan.repeat scan_str >> implode) --| keep_line ($$ "\""))); + + +(* scan verbatim text *) + +val scan_verb = + scan_blank || + keep_line ($$ "|" --| Scan.ahead (Scan.one (not_equal "}"))) || + keep_line (Scan.one (not_equal "|" andf Symbol.not_eof)); + +val scan_verbatim = + keep_line ($$ "{" -- $$ "|") |-- + !! (lex_err (K "missing end of verbatim text")) + (change_prompt ((Scan.repeat scan_verb >> implode) --| keep_line ($$ "|" -- $$ "}"))); + + +(* scan space *) + +val is_space = Symbol.is_blank andf not_equal "\n"; + +val scan_space = + keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" || + keep_line (Scan.any is_space) |-- incr_line ($$ "\n"); + + +(* scan nested comments *) + +val scan_cmt = + Scan.lift scan_blank || + Scan.depend (fn d => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) || + Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) || + Scan.lift (keep_line ($$ "*" --| Scan.ahead (Scan.one (not_equal ")")))) || + Scan.lift (keep_line (Scan.one (not_equal "*" andf Symbol.not_eof))); + +val scan_comment = + keep_line ($$ "(" -- $$ "*") |-- + !! (lex_err (K "missing end of comment")) + (change_prompt + (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K "")); + + +(* scan token *) + +fun scan lex (pos, cs) = + let + fun token k x = Token (pos, (k, x)); + fun ignore _ = token Ignore ""; + + val scanner = + scan_string >> token String || + scan_verbatim >> token Verbatim || + scan_space >> ignore || + scan_comment >> ignore || + keep_line (Scan.max token_leq + (Scan.literal lex >> (token Keyword o implode)) + (Syntax.scan_longid >> token LongIdent || + Syntax.scan_id >> token Ident || + Syntax.scan_var >> token Var || + $$ "?" ^^ $$ "?" ^^ Syntax.scan_id >> token TextVar || + Syntax.scan_tid >> token TypeIdent || + Syntax.scan_tvar >> token TypeVar || + Syntax.scan_nat >> token Nat || + scan_symid >> token SymIdent)); + in + !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning cs))) scanner (pos, cs) + end; + + +(* source of (proper) tokens *) + +fun recover xs = keep_line (Scan.any1 ((not o Symbol.is_blank) andf Symbol.not_eof)) xs; + +fun source do_recover get_lex pos src = + Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) + (if do_recover then Some recover else None) src + |> Source.filter is_proper; + + +end;