src/Pure/Thy/scan.ML
changeset 0 a5a9c433f639
child 213 42f2b8a3581f
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/Thy/scan.ML	Thu Sep 16 12:20:38 1993 +0200
     1.3 @@ -0,0 +1,164 @@
     1.4 +(*  Title: 	Pure/Thy/scan
     1.5 +    ID:         $Id$
     1.6 +    Author: 	Sonia Mahjoub / Tobias Nipkow
     1.7 +    Copyright   1992  TU Muenchen
     1.8 +
     1.9 +The scanner. Adapted from Larry Paulson's ML for the Working Programmer.
    1.10 +*)
    1.11 +
    1.12 +signature LEXICAL =
    1.13 +sig
    1.14 +
    1.15 +
    1.16 +datatype token = Id  of string 
    1.17 +               | Key of string
    1.18 +               | Nat of string
    1.19 +               | Stg of string
    1.20 +               | Txt of string
    1.21 +
    1.22 +val scan : string list -> (token * int) list
    1.23 +end;
    1.24 +
    1.25 +signature KEYWORD = 
    1.26 +sig
    1.27 +val alphas  : string list
    1.28 +val symbols : string list
    1.29 +end;
    1.30 +
    1.31 +
    1.32 +functor LexicalFUN (Keyword: KEYWORD): LEXICAL = 
    1.33 +
    1.34 +struct
    1.35 +
    1.36 +
    1.37 +
    1.38 +datatype token = Id  of string 
    1.39 +               | Key of string
    1.40 +               | Nat of string
    1.41 +               | Stg of string
    1.42 +               | Txt of string;
    1.43 +
    1.44 +
    1.45 +fun lexerr(n,s) =
    1.46 +    error("Lexical error on line " ^ (string_of_int n) ^ ": " ^ s);
    1.47 +
    1.48 +val specials = explode"!{}@#$%^&*()+-=[]:\";',./?`_~<>|\\";
    1.49 +
    1.50 +fun is_symbol c = "_" = c orelse "'" = c;
    1.51 +
    1.52 +fun alphanum (id, c::cs) =
    1.53 +       if is_letter c orelse is_digit c orelse is_symbol c
    1.54 +       then alphanum (id ^ c , cs)
    1.55 +       else (id , c :: cs)
    1.56 +  | alphanum (id ,[]) = (id ,[]);
    1.57 +
    1.58 +fun numeric (nat, c::cs) =
    1.59 +      if is_digit c 
    1.60 +      then numeric (nat^c, cs)
    1.61 +      else (nat, c::cs)
    1.62 +  | numeric (nat, []) = (nat,[]);
    1.63 + 
    1.64 +fun tokenof (a, n) =
    1.65 +      if a mem Keyword.alphas
    1.66 +      then (Key a, n) 
    1.67 +      else (Id a, n);
    1.68 +
    1.69 +fun symbolic (sy, c::cs) =
    1.70 +       if (sy mem Keyword.symbols) andalso 
    1.71 +          not((sy^c) mem Keyword.symbols) 
    1.72 +          orelse not (c mem specials)
    1.73 +       then (sy, c::cs)
    1.74 +       else symbolic(sy^c, cs)
    1.75 +  | symbolic (sy, []) = (sy, []);
    1.76 +
    1.77 +
    1.78 +fun stringerr(n) = lexerr(n, "No matching quote found on this line");
    1.79 +
    1.80 +fun is_control_chr ([],_,n) = stringerr(n)
    1.81 +  | is_control_chr (c::cs,s,n) = 
    1.82 +          let val m = ord c
    1.83 +          in if (m >= 64 andalso m <= 95)
    1.84 +             then (cs, s^c, n)
    1.85 +             else stringerr(n)
    1.86 +          end;
    1.87 +
    1.88 +fun is_3_dgt (c1::c2::cs, c,n) = 
    1.89 +          let val s = c^c1^c2
    1.90 +          in  if (s >= "000" andalso s <= "255")
    1.91 +              then (cs, s)
    1.92 +              else stringerr(n)
    1.93 +          end 
    1.94 +  | is_3_dgt (_,_,n) = stringerr(n); 
    1.95 +
    1.96 +fun is_imprt_seq ([],_,n) = stringerr(n)
    1.97 +  | is_imprt_seq ((c::cs),s,n) = 
    1.98 +          if c = "\\" then (cs,s^"\\",n)
    1.99 +          else if c = "\n"
   1.100 +               then is_imprt_seq (cs,s^"\n",n+1)
   1.101 +          else if (c = "\t") orelse (c = " ")
   1.102 +               then is_imprt_seq (cs,s^c,n)
   1.103 +          else stringerr(n);
   1.104 +
   1.105 +fun is_escape_seq ([],_,n) = stringerr(n)
   1.106 +  | is_escape_seq ((c::cs),s,n) =  
   1.107 +          if c = "\\" then (cs,s^"\\",n)
   1.108 +          else if c = "\n" 
   1.109 +               then is_imprt_seq (cs,s^"\n",n+1) 
   1.110 +          else if (c = "\t") orelse (c = " ")
   1.111 +               then is_imprt_seq (cs,s^c,n)
   1.112 +          else if c = "^" 
   1.113 +               then is_control_chr (cs,s^"^",n)
   1.114 +          else if ("0" <= c andalso c <= "2") 
   1.115 +               then let val (cs',s') = 
   1.116 +                            is_3_dgt(cs,c,n)
   1.117 +                    in (cs',s^s',n)
   1.118 +                    end
   1.119 +          else stringerr(n);
   1.120 +
   1.121 +
   1.122 +fun string (_,[],_,n) = stringerr(n)
   1.123 +  | string (toks, c::cs, s, n) =
   1.124 +       if c  = "\"" then ((Stg s, n)::toks , cs, n)
   1.125 +       else if c = "\\" 
   1.126 +            then  let val (cs',s',n') = 
   1.127 +                          is_escape_seq (cs, s^"\\",n) 
   1.128 +                  in string (toks,cs',s',n') end 
   1.129 +       else string (toks,cs,s^c,n);
   1.130 +
   1.131 +
   1.132 +fun comment ((c1::c2::cs), n) =
   1.133 +      if c1 = "*" andalso c2 = ")" then (cs,n) else
   1.134 +      if c1 = "\n" then comment((c2::cs), n+1)
   1.135 +      else comment((c2::cs), n)
   1.136 +  | comment (_, n) = lexerr(n, "Missing end of comment");
   1.137 +
   1.138 +fun scanning (toks , [], n) = rev toks
   1.139 +  | scanning (toks , c :: cs, n) = 
   1.140 +       if is_letter c 
   1.141 +       then let val (id , cs2) = alphanum (c , cs)
   1.142 +            in if id = "ML"
   1.143 +               then let val text = implode cs2
   1.144 +                    in  scanning ((Txt text,n) :: toks , [], n)
   1.145 +                    end
   1.146 +               else scanning (tokenof(id,n) :: toks , cs2, n) 
   1.147 +            end
   1.148 +       else if is_digit c 
   1.149 +            then let val (nat , cs2) = numeric(c , cs)
   1.150 +                 in scanning ((Nat nat,n) :: toks , cs2, n) end
   1.151 +       else if c mem specials
   1.152 +            then if c = "\""
   1.153 +                 then let val (toks', cs', n') = string (toks, cs, "", n)
   1.154 +                      in scanning (toks', cs', n') end
   1.155 +                 else let val (sy , cs2) = symbolic (c , cs)
   1.156 +                      in if sy = "(*"
   1.157 +                         then let val (cs3,n2) = comment(cs2,n)
   1.158 +                              in scanning (toks , cs3, n2) end
   1.159 +                         else scanning ((Key sy,n) :: toks, cs2, n)
   1.160 +                      end
   1.161 +       else if c = "\n" then scanning (toks, cs, n+1)
   1.162 +       else if c = " " orelse c = "\t" then scanning (toks , cs, n)
   1.163 +       else lexerr(n,"Illegal character " ^ c);
   1.164 +
   1.165 +fun scan a = scanning ([] , a, 1);
   1.166 +
   1.167 +end;