src/Pure/Thy/scan.ML
author wenzelm
Sun Jul 23 12:10:11 2000 +0200 (2000-07-23)
changeset 9416 9144976964e7
parent 213 42f2b8a3581f
permissions -rw-r--r--
removed all_sessions.graph;
improved graph 'directories';
tuned;
     1 (*  Title: 	Pure/Thy/scan
     2     ID:         $Id$
     3     Author: 	Sonia Mahjoub / Tobias Nipkow
     4     Copyright   1992  TU Muenchen
     5 
     6     modified    December 1993 by Max Breitling (Type-variables added)
     7 
     8 The scanner. Adapted from Larry Paulson's ML for the Working Programmer.
     9 *)
    10 
    11 signature LEXICAL =
    12 sig
    13 
    14 
    15 datatype token = Id  of string 
    16                | Key of string
    17                | Nat of string
    18                | Stg of string
    19                | Txt of string
    20                | TypVar of string
    21 
    22 val scan : string list -> (token * int) list
    23 end;
    24 
    25 signature KEYWORD = 
    26 sig
    27 val alphas  : string list
    28 val symbols : string list
    29 end;
    30 
    31 
    32 functor LexicalFUN (Keyword: KEYWORD): LEXICAL = 
    33 
    34 struct
    35 
    36 
    37 
    38 datatype token = Id  of string 
    39                | Key of string
    40                | Nat of string
    41                | Stg of string
    42                | Txt of string
    43                | TypVar of string;
    44 
    45 
    46 fun lexerr(n,s) =
    47     error("Lexical error on line " ^ (string_of_int n) ^ ": " ^ s);
    48 
    49 val specials = explode"!{}@#$%^&*()+-=[]:\";,./?`_~<>|\\";
    50 
    51 fun is_symbol c = "_" = c orelse "'" = c;
    52 
    53 fun alphanum (id, c::cs) =
    54        if is_letter c orelse is_digit c orelse is_symbol c
    55        then alphanum (id ^ c , cs)
    56        else (id , c :: cs)
    57   | alphanum (id ,[]) = (id ,[]);
    58 
    59 fun numeric (nat, c::cs) =
    60       if is_digit c 
    61       then numeric (nat^c, cs)
    62       else (nat, c::cs)
    63   | numeric (nat, []) = (nat,[]);
    64  
    65 fun tokenof (a, n) =
    66       if a mem Keyword.alphas
    67       then (Key a, n) 
    68       else (Id a, n);
    69 
    70 fun symbolic (sy, c::cs) =
    71        if (sy mem Keyword.symbols) andalso 
    72           not((sy^c) mem Keyword.symbols) 
    73           orelse not (c mem specials)
    74        then (sy, c::cs)
    75        else symbolic(sy^c, cs)
    76   | symbolic (sy, []) = (sy, []);
    77 
    78 fun stringerr(n) = lexerr(n, "No matching quote found on this line");
    79 
    80 fun is_control_chr ([],_,n) = stringerr(n)
    81   | is_control_chr (c::cs,s,n) = 
    82           let val m = ord c
    83           in if (m >= 64 andalso m <= 95)
    84              then (cs, s^c, n)
    85              else stringerr(n)
    86           end;
    87 
    88 fun is_3_dgt (c1::c2::cs, c,n) = 
    89           let val s = c^c1^c2
    90           in  if (s >= "000" andalso s <= "255")
    91               then (cs, s)
    92               else stringerr(n)
    93           end 
    94   | is_3_dgt (_,_,n) = stringerr(n); 
    95 
    96 fun is_imprt_seq ([],_,n) = stringerr(n)
    97   | is_imprt_seq ((c::cs),s,n) = 
    98           if c = "\\" then (cs,s^"\\",n)
    99           else if c = "\n"
   100                then is_imprt_seq (cs,s^"\n",n+1)
   101           else if (c = "\t") orelse (c = " ")
   102                then is_imprt_seq (cs,s^c,n)
   103           else stringerr(n);
   104 
   105 fun is_escape_seq ([],_,n) = stringerr(n)
   106   | is_escape_seq ((c::cs),s,n) =  
   107           if c = "\\" then (cs,s^"\\",n)
   108           else if c = "\n" 
   109                then is_imprt_seq (cs,s^"\n",n+1) 
   110           else if (c = "\t") orelse (c = " ")
   111                then is_imprt_seq (cs,s^c,n)
   112           else if c = "^" 
   113                then is_control_chr (cs,s^"^",n)
   114           else if ("0" <= c andalso c <= "2") 
   115                then let val (cs',s') = 
   116                             is_3_dgt(cs,c,n)
   117                     in (cs',s^s',n)
   118                     end
   119           else stringerr(n);
   120 
   121 
   122 fun string (_,[],_,n) = stringerr(n)
   123   | string (toks, c::cs, s, n) =
   124        if c  = "\"" then ((Stg s, n)::toks , cs, n)
   125        else if c = "\\" 
   126             then  let val (cs',s',n') = 
   127                           is_escape_seq (cs, s^"\\",n) 
   128                   in string (toks,cs',s',n') end 
   129        else string (toks,cs,s^c,n);
   130 
   131 
   132 fun comment ((c1::c2::cs), n) =
   133       if c1 = "*" andalso c2 = ")" then (cs,n) else
   134       if c1 = "\n" then comment((c2::cs), n+1)
   135       else comment((c2::cs), n)
   136   | comment (_, n) = lexerr(n, "Missing end of comment");
   137 
   138 fun scanning (toks , [], n) = rev toks
   139   | scanning (toks , c :: cs, n) = 
   140        if is_letter c 
   141        then let val (id , cs2) = alphanum (c , cs)
   142             in if id = "ML"
   143                then let val text = implode cs2
   144                     in  scanning ((Txt text,n) :: toks , [], n)
   145                     end
   146                else scanning (tokenof(id,n) :: toks , cs2, n) 
   147             end
   148        else if is_digit c 
   149             then let val (nat , cs2) = numeric(c , cs)
   150                  in scanning ((Nat nat,n) :: toks , cs2, n) end
   151 
   152        else if c = "'" andalso is_letter(hd cs)
   153             then let val (var, cs2) = alphanum (hd cs, tl cs)
   154                  in scanning((TypVar (c^var),n) :: toks, cs2, n) end
   155 
   156        else if c mem specials
   157             then if c = "\""
   158                  then let val (toks', cs', n') = string (toks, cs, "", n)
   159                       in scanning (toks', cs', n') end
   160                  else let val (sy , cs2) = symbolic (c , cs)
   161                       in if sy = "(*"
   162                          then let val (cs3,n2) = comment(cs2,n)
   163                               in scanning (toks , cs3, n2) end
   164                          else scanning ((Key sy,n) :: toks, cs2, n)
   165                       end
   166        else if c = "\n" then scanning (toks, cs, n+1)
   167        else if c = " " orelse c = "\t" then scanning (toks , cs, n)
   168        else lexerr(n,"Illegal character " ^ c);
   169 
   170 fun scan a = scanning ([] , a, 1);
   171 
   172 end;