src/Pure/Thy/scan.ML
author clasohm
Thu, 16 Sep 1993 12:20:38 +0200
changeset 0 a5a9c433f639
child 213 42f2b8a3581f
permissions -rw-r--r--
Initial revision
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     1
(*  Title: 	Pure/Thy/scan
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     2
    ID:         $Id$
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     3
    Author: 	Sonia Mahjoub / Tobias Nipkow
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     4
    Copyright   1992  TU Muenchen
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     5
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     6
The scanner. Adapted from Larry Paulson's ML for the Working Programmer.
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     7
*)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     8
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
     9
signature LEXICAL =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    10
sig
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    11
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    12
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    13
datatype token = Id  of string 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    14
               | Key of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    15
               | Nat of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    16
               | Stg of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    17
               | Txt of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    18
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    19
val scan : string list -> (token * int) list
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    20
end;
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    21
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    22
signature KEYWORD = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    23
sig
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    24
val alphas  : string list
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    25
val symbols : string list
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    26
end;
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    27
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    28
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    29
functor LexicalFUN (Keyword: KEYWORD): LEXICAL = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    30
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    31
struct
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    32
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    33
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    34
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    35
datatype token = Id  of string 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    36
               | Key of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    37
               | Nat of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    38
               | Stg of string
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    39
               | Txt of string;
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    40
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    41
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    42
fun lexerr(n,s) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    43
    error("Lexical error on line " ^ (string_of_int n) ^ ": " ^ s);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    44
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    45
val specials = explode"!{}@#$%^&*()+-=[]:\";',./?`_~<>|\\";
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    46
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    47
fun is_symbol c = "_" = c orelse "'" = c;
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    48
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    49
fun alphanum (id, c::cs) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    50
       if is_letter c orelse is_digit c orelse is_symbol c
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    51
       then alphanum (id ^ c , cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    52
       else (id , c :: cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    53
  | alphanum (id ,[]) = (id ,[]);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    54
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    55
fun numeric (nat, c::cs) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    56
      if is_digit c 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    57
      then numeric (nat^c, cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    58
      else (nat, c::cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    59
  | numeric (nat, []) = (nat,[]);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    60
 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    61
fun tokenof (a, n) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    62
      if a mem Keyword.alphas
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    63
      then (Key a, n) 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    64
      else (Id a, n);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    65
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    66
fun symbolic (sy, c::cs) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    67
       if (sy mem Keyword.symbols) andalso 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    68
          not((sy^c) mem Keyword.symbols) 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    69
          orelse not (c mem specials)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    70
       then (sy, c::cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    71
       else symbolic(sy^c, cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    72
  | symbolic (sy, []) = (sy, []);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    73
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    74
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    75
fun stringerr(n) = lexerr(n, "No matching quote found on this line");
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    76
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    77
fun is_control_chr ([],_,n) = stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    78
  | is_control_chr (c::cs,s,n) = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    79
          let val m = ord c
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    80
          in if (m >= 64 andalso m <= 95)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    81
             then (cs, s^c, n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    82
             else stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    83
          end;
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    84
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    85
fun is_3_dgt (c1::c2::cs, c,n) = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    86
          let val s = c^c1^c2
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    87
          in  if (s >= "000" andalso s <= "255")
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    88
              then (cs, s)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    89
              else stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    90
          end 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    91
  | is_3_dgt (_,_,n) = stringerr(n); 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    92
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    93
fun is_imprt_seq ([],_,n) = stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    94
  | is_imprt_seq ((c::cs),s,n) = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    95
          if c = "\\" then (cs,s^"\\",n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    96
          else if c = "\n"
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    97
               then is_imprt_seq (cs,s^"\n",n+1)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    98
          else if (c = "\t") orelse (c = " ")
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
    99
               then is_imprt_seq (cs,s^c,n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   100
          else stringerr(n);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   101
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   102
fun is_escape_seq ([],_,n) = stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   103
  | is_escape_seq ((c::cs),s,n) =  
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   104
          if c = "\\" then (cs,s^"\\",n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   105
          else if c = "\n" 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   106
               then is_imprt_seq (cs,s^"\n",n+1) 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   107
          else if (c = "\t") orelse (c = " ")
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   108
               then is_imprt_seq (cs,s^c,n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   109
          else if c = "^" 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   110
               then is_control_chr (cs,s^"^",n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   111
          else if ("0" <= c andalso c <= "2") 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   112
               then let val (cs',s') = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   113
                            is_3_dgt(cs,c,n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   114
                    in (cs',s^s',n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   115
                    end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   116
          else stringerr(n);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   117
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   118
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   119
fun string (_,[],_,n) = stringerr(n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   120
  | string (toks, c::cs, s, n) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   121
       if c  = "\"" then ((Stg s, n)::toks , cs, n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   122
       else if c = "\\" 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   123
            then  let val (cs',s',n') = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   124
                          is_escape_seq (cs, s^"\\",n) 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   125
                  in string (toks,cs',s',n') end 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   126
       else string (toks,cs,s^c,n);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   127
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   128
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   129
fun comment ((c1::c2::cs), n) =
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   130
      if c1 = "*" andalso c2 = ")" then (cs,n) else
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   131
      if c1 = "\n" then comment((c2::cs), n+1)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   132
      else comment((c2::cs), n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   133
  | comment (_, n) = lexerr(n, "Missing end of comment");
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   134
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   135
fun scanning (toks , [], n) = rev toks
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   136
  | scanning (toks , c :: cs, n) = 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   137
       if is_letter c 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   138
       then let val (id , cs2) = alphanum (c , cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   139
            in if id = "ML"
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   140
               then let val text = implode cs2
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   141
                    in  scanning ((Txt text,n) :: toks , [], n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   142
                    end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   143
               else scanning (tokenof(id,n) :: toks , cs2, n) 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   144
            end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   145
       else if is_digit c 
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   146
            then let val (nat , cs2) = numeric(c , cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   147
                 in scanning ((Nat nat,n) :: toks , cs2, n) end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   148
       else if c mem specials
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   149
            then if c = "\""
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   150
                 then let val (toks', cs', n') = string (toks, cs, "", n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   151
                      in scanning (toks', cs', n') end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   152
                 else let val (sy , cs2) = symbolic (c , cs)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   153
                      in if sy = "(*"
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   154
                         then let val (cs3,n2) = comment(cs2,n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   155
                              in scanning (toks , cs3, n2) end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   156
                         else scanning ((Key sy,n) :: toks, cs2, n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   157
                      end
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   158
       else if c = "\n" then scanning (toks, cs, n+1)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   159
       else if c = " " orelse c = "\t" then scanning (toks , cs, n)
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   160
       else lexerr(n,"Illegal character " ^ c);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   161
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   162
fun scan a = scanning ([] , a, 1);
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   163
a5a9c433f639 Initial revision
clasohm
parents:
diff changeset
   164
end;