src/Pure/Syntax/parser.ML
author clasohm
Fri Apr 22 12:43:53 1994 +0200 (1994-04-22 ago)
changeset 330 2fda15dd1e0f
parent 258 e540b7d4ecb1
child 345 7007562172b1
permissions -rw-r--r--
changed the way a grammar is generated to allow the new parser to work;
also made a lot of changes in parser.ML and minor ones elsewhere
wenzelm@18
     1
(*  Title:      Pure/Syntax/parser.ML
wenzelm@46
     2
    ID:         $Id$
wenzelm@18
     3
    Author:     Sonia Mahjoub and Markus Wenzel, TU Muenchen
wenzelm@18
     4
wenzelm@18
     5
Isabelle's main parser (used for terms and typs).
wenzelm@18
     6
wenzelm@18
     7
TODO:
wenzelm@18
     8
  improve syntax error
wenzelm@237
     9
  extend_gram: remove 'roots' arg
wenzelm@18
    10
*)
wenzelm@18
    11
wenzelm@18
    12
signature PARSER =
wenzelm@18
    13
sig
wenzelm@237
    14
  structure Lexicon: LEXICON
wenzelm@237
    15
  structure SynExt: SYN_EXT
wenzelm@237
    16
  local open Lexicon SynExt SynExt.Ast in
wenzelm@18
    17
    type gram
wenzelm@18
    18
    val empty_gram: gram
wenzelm@237
    19
    val extend_gram: gram -> string list -> xprod list -> gram
wenzelm@237
    20
    val merge_grams: gram -> gram -> gram
wenzelm@237
    21
    val pretty_gram: gram -> Pretty.T list
wenzelm@237
    22
    datatype parsetree =
wenzelm@237
    23
      Node of string * parsetree list |
wenzelm@237
    24
      Tip of token
clasohm@330
    25
    val parse: gram -> string -> token list -> parsetree list
wenzelm@18
    26
  end
wenzelm@18
    27
end;
wenzelm@18
    28
wenzelm@237
    29
functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON
clasohm@330
    30
  and SynExt: SYN_EXT) =
wenzelm@18
    31
struct
wenzelm@18
    32
wenzelm@237
    33
structure Pretty = SynExt.Ast.Pretty;
wenzelm@237
    34
structure Lexicon = Lexicon;
wenzelm@237
    35
structure SynExt = SynExt;
wenzelm@237
    36
open Lexicon SynExt;
wenzelm@18
    37
wenzelm@18
    38
wenzelm@18
    39
(** datatype gram **)
wenzelm@18
    40
wenzelm@237
    41
datatype symb =
wenzelm@237
    42
  Terminal of token |
wenzelm@237
    43
  Nonterminal of string * int;
wenzelm@18
    44
clasohm@330
    45
datatype refsymb = Term of token | Nonterm of rhss_ref * int
clasohm@330
    46
                                (*reference to production list instead of name*)
clasohm@330
    47
and gram = Gram of (string * (symb list * string * int)) list *
clasohm@330
    48
                   (string * rhss_ref) list
clasohm@330
    49
withtype rhss_ref = (token option * (refsymb list * string * int) list) list ref
clasohm@330
    50
                                      (*lookahead table: token and productions*)
clasohm@330
    51
clasohm@330
    52
(* convert productions to reference grammar with lookaheads and eliminate
clasohm@330
    53
   chain productions *)
clasohm@330
    54
fun mk_gram prods = 
clasohm@330
    55
  let (*get reference on list of all possible rhss for nonterminal lhs
clasohm@330
    56
        (if it doesn't exist a new one is created and added to the nonterminal
clasohm@330
    57
         list)*)
clasohm@330
    58
      fun get_rhss ref_prods lhs =
clasohm@330
    59
        case assoc (ref_prods, lhs) of
clasohm@330
    60
            None =>
clasohm@330
    61
              let val l = ref [(None, [])]
clasohm@330
    62
              in (l, (lhs, l) :: ref_prods) end
clasohm@330
    63
          | Some l => (l, ref_prods);
clasohm@330
    64
clasohm@330
    65
      (*convert symb list to refsymb list*)
clasohm@330
    66
      fun mk_refsymbs ref_prods [] rs = (rs, ref_prods)
clasohm@330
    67
        | mk_refsymbs ref_prods (Terminal tk :: symbs) rs =
clasohm@330
    68
            mk_refsymbs ref_prods symbs (rs @ [Term tk])
clasohm@330
    69
        | mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs =
clasohm@330
    70
            let val (rhss, ref_prods') = get_rhss ref_prods name
clasohm@330
    71
            in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)])
clasohm@330
    72
            end;
clasohm@330
    73
clasohm@330
    74
      (*convert prod list to (string * rhss_ref) list
clasohm@330
    75
        without computing lookaheads*)
clasohm@330
    76
      fun mk_ref_gram [] ref_prods = ref_prods
clasohm@330
    77
        | mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods =
clasohm@330
    78
            let val (rhs', ref_prods') = get_rhss ref_prods lhs;
clasohm@330
    79
                val (dummy, rhss) = hd (!rhs');
clasohm@330
    80
                val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs [];
clasohm@330
    81
            in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)];
clasohm@330
    82
               mk_ref_gram ps ref_prods''
clasohm@330
    83
            end;
clasohm@330
    84
clasohm@330
    85
      (*eliminate chain productions*)
clasohm@330
    86
      fun elim_chain ref_gram =
clasohm@330
    87
        let (*make a list of pairs representing chain productions and delete
clasohm@330
    88
              these productions*)
clasohm@330
    89
            fun list_chain [] = []
clasohm@330
    90
              | list_chain ((_, rhss_ref) :: ps) =
clasohm@330
    91
                  let fun lists [] new_rhss chains = (new_rhss, chains)
clasohm@330
    92
                        | lists (([Nonterm (id2, ~1)], _, ~1) :: rs) 
clasohm@330
    93
                                new_rhss chains =
clasohm@330
    94
                            lists rs new_rhss ((rhss_ref, id2) :: chains)
clasohm@330
    95
                        | lists (rhs :: rs) new_rhss chains = 
clasohm@330
    96
                            lists rs (rhs :: new_rhss) chains;
clasohm@330
    97
clasohm@330
    98
                      val (dummy, rhss) = hd (!rhss_ref);
clasohm@330
    99
clasohm@330
   100
                      val (new_rhss, chains) = lists rhss [] [];
clasohm@330
   101
                  in rhss_ref := [(dummy, new_rhss)];
clasohm@330
   102
                     chains @ (list_chain ps)
clasohm@330
   103
                  end;
clasohm@330
   104
clasohm@330
   105
            (*convert a list of pairs to an association list*)
clasohm@330
   106
            fun compress chains =
clasohm@330
   107
              let fun doit [] result = result
clasohm@330
   108
                    | doit ((id1, id2) :: ps) result =
clasohm@330
   109
                        doit ps 
clasohm@330
   110
                        (overwrite (result, (id1, id2 :: (assocs result id1))));
clasohm@330
   111
              in doit chains [] end;
clasohm@330
   112
clasohm@330
   113
            (*replace reference by list of rhss in chain pairs*)
clasohm@330
   114
            fun unref (id1, ids) =
clasohm@330
   115
              let fun unref1 [] = []
clasohm@330
   116
                    | unref1 (id :: ids) =
clasohm@330
   117
                        let val (_, rhss) = hd (!id);
clasohm@330
   118
                        in rhss @ (unref1 ids) end;
clasohm@330
   119
              in (id1, unref1 ids) end;
clasohm@330
   120
clasohm@330
   121
            val chain_pairs = 
clasohm@330
   122
               map unref (transitive_closure (compress (list_chain ref_gram)));
clasohm@330
   123
clasohm@330
   124
            (*add new rhss to productions*)
clasohm@330
   125
            fun mk_chain (rhss_ref, rhss) =
clasohm@330
   126
              let val (dummy, old_rhss) = hd (!rhss_ref);
clasohm@330
   127
              in rhss_ref := [(dummy, old_rhss @ rhss)] end;
clasohm@330
   128
        in map mk_chain chain_pairs;
clasohm@330
   129
           ref_gram
clasohm@330
   130
        end;
clasohm@330
   131
clasohm@330
   132
      val ref_gram = elim_chain (mk_ref_gram prods []);
wenzelm@18
   133
clasohm@330
   134
      (*make a list of all lambda NTs 
clasohm@330
   135
        (i.e. nonterminals that can produce lambda*)
clasohm@330
   136
      val lambdas =
clasohm@330
   137
        let fun lambda [] result = result
clasohm@330
   138
              | lambda ((_, rhss_ref) :: nts) result =
clasohm@330
   139
                  if rhss_ref mem result then
clasohm@330
   140
                    lambda nts result
clasohm@330
   141
                  else
clasohm@330
   142
                    let (*test if a list of rhss contains a production
clasohm@330
   143
                          consisting only of lambda NTs*)
clasohm@330
   144
                        fun only_lambdas _ [] = []
clasohm@330
   145
                          | only_lambdas result ((_, rhss_ref) :: ps) =
clasohm@330
   146
                              let fun only (symbs, _, _) =
clasohm@330
   147
                                   forall (fn (Nonterm (id, _)) => id mem result
clasohm@330
   148
                                            | (Term _)          => false) symbs;
clasohm@330
   149
                          
clasohm@330
   150
                                  val (_, rhss) = hd (!rhss_ref);
clasohm@330
   151
                              in if not (rhss_ref mem result) andalso
clasohm@330
   152
                                    exists only rhss then
clasohm@330
   153
                                   rhss_ref :: (only_lambdas result ps)
clasohm@330
   154
                                 else
clasohm@330
   155
                                   only_lambdas result ps
clasohm@330
   156
                              end;
clasohm@330
   157
clasohm@330
   158
                        (*search for NTs that can be produced by a list of
clasohm@330
   159
                          lambda NTs*)
clasohm@330
   160
                        fun produced_by [] result = result
clasohm@330
   161
                          | produced_by (rhss_ref :: rhss_refs) result =
clasohm@330
   162
                              let val new_lambdas = 
clasohm@330
   163
                                    only_lambdas result ref_gram;
clasohm@330
   164
                              in produced_by (rhss_refs union new_lambdas)
clasohm@330
   165
                                             (result @ new_lambdas)
clasohm@330
   166
                              end;
clasohm@330
   167
clasohm@330
   168
                        val (_, rhss) = hd (!rhss_ref);
clasohm@330
   169
                    in if exists (fn (symbs, _, _) => null symbs) rhss
clasohm@330
   170
                       then lambda nts
clasohm@330
   171
                              (produced_by [rhss_ref] (rhss_ref :: result))
clasohm@330
   172
                       else lambda nts result
clasohm@330
   173
                    end;
clasohm@330
   174
         in lambda ref_gram [] end;
clasohm@330
   175
clasohm@330
   176
      (*list all nonterminals on which the lookahead depends (due to lambda 
clasohm@330
   177
        NTs this can be more than one)
clasohm@330
   178
        and report if there is a terminal at the 'start'*)
clasohm@330
   179
      fun rhss_start [] skipped = (None, skipped)
clasohm@330
   180
        | rhss_start (Term tk :: _) skipped = (Some tk, skipped)
clasohm@330
   181
        | rhss_start (Nonterm (rhss_ref, _) :: rest) skipped =
clasohm@330
   182
            if rhss_ref mem lambdas then 
clasohm@330
   183
              rhss_start rest (rhss_ref ins skipped)
clasohm@330
   184
            else
clasohm@330
   185
              (None, rhss_ref ins skipped);
clasohm@330
   186
clasohm@330
   187
      (*list all terminals that can start the given rhss*)
clasohm@330
   188
      fun look_rhss starts rhss_ref =
clasohm@330
   189
        let fun look [] _ = []
clasohm@330
   190
              | look ((symbs, _, _) :: todos) done =
clasohm@330
   191
                  let val (start_token, skipped) = rhss_start symbs [];
clasohm@330
   192
clasohm@330
   193
                      (*process all nonterminals on which the lookahead
clasohm@330
   194
                        depends and build the new todo and done lists for
clasohm@330
   195
                        the look function*)
clasohm@330
   196
                      fun look2 [] todos = look todos (done union skipped)
clasohm@330
   197
                        | look2 (rhss_ref :: ls) todos =
clasohm@330
   198
                            if rhss_ref mem done then look2 ls todos
clasohm@330
   199
                            else case assoc (starts, rhss_ref) of
clasohm@330
   200
                                Some tks => tks union (look2 ls todos)
clasohm@330
   201
                              | None => let val (_, rhss) = hd (!rhss_ref);
clasohm@330
   202
                                        in look2 ls (rhss union todos) end;
clasohm@330
   203
                  in case start_token of
clasohm@330
   204
                         Some tk => start_token ins (look2 skipped todos)
clasohm@330
   205
                       | None => look2 skipped todos
clasohm@330
   206
                  end;
clasohm@330
   207
 
clasohm@330
   208
            val (_, rhss) = hd (!rhss_ref);
clasohm@330
   209
        in look rhss [rhss_ref] end;                       
clasohm@330
   210
clasohm@330
   211
      (*make a table that contains all possible starting terminals
clasohm@330
   212
        for each nonterminal*)
clasohm@330
   213
      fun mk_starts [] starts = starts
clasohm@330
   214
        | mk_starts ((_, rhss_ref) :: ps) starts =
clasohm@330
   215
            mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts);
clasohm@330
   216
clasohm@330
   217
      val starts = mk_starts ref_gram [];
clasohm@330
   218
clasohm@330
   219
      (*add list of allowed starting tokens to productions*)
clasohm@330
   220
      fun mk_lookahead (_, rhss_ref) =
clasohm@330
   221
        let (*add item to lookahead list (a list containing pairs of token and 
clasohm@330
   222
              rhss that can be started with this token*)
clasohm@330
   223
            fun add_start _ [] table = table
clasohm@330
   224
              | add_start new_rhs (token :: tks) table =
clasohm@330
   225
                  let fun add [] =
clasohm@330
   226
                            [(token, [new_rhs])]
clasohm@330
   227
                        | add ((tk, rhss) :: ss) =
clasohm@330
   228
                            if token = tk then 
clasohm@330
   229
                              (tk, new_rhs :: rhss) :: ss
clasohm@330
   230
                            else
clasohm@330
   231
                              (tk, rhss) :: (add ss);
clasohm@330
   232
                  in add_start new_rhs tks (add table) end;
clasohm@330
   233
clasohm@330
   234
            (*combine all lookaheads of a list of nonterminals*)
clasohm@330
   235
            fun combine_starts [] = []
clasohm@330
   236
              | combine_starts (rhss_ref :: ps) =
clasohm@330
   237
                  let val Some tks = assoc (starts, rhss_ref)
clasohm@330
   238
                  in tks union combine_starts ps end;
clasohm@330
   239
clasohm@330
   240
            (*get lookahead for a rhs and update lhs' lookahead list*)
clasohm@330
   241
            fun look_rhss [] table = table
clasohm@330
   242
              | look_rhss ((rhs as (symbs, id, prec)) :: rs) table =
clasohm@330
   243
                  let val (start_token, skipped) = rhss_start symbs [];
clasohm@330
   244
                      val starts = case start_token of
clasohm@330
   245
                                     Some tk => Some tk 
clasohm@330
   246
                                                ins (combine_starts skipped)
clasohm@330
   247
                                   | None => if skipped = [] 
clasohm@330
   248
                            orelse forall (fn id => id mem lambdas) skipped then
clasohm@330
   249
                                                          (*lambda production?*)
clasohm@330
   250
                                               [None]
clasohm@330
   251
                                             else
clasohm@330
   252
                                               combine_starts skipped;
clasohm@330
   253
                  in look_rhss rs (add_start rhs starts table) end;
clasohm@330
   254
clasohm@330
   255
             val (_, rhss) = hd (!rhss_ref);
clasohm@330
   256
        in rhss_ref := look_rhss rhss [] end;
clasohm@330
   257
clasohm@330
   258
  in map mk_lookahead ref_gram;
clasohm@330
   259
     Gram (prods, ref_gram)
clasohm@330
   260
  end;
wenzelm@18
   261
wenzelm@18
   262
wenzelm@237
   263
(* empty, extend, merge grams *)
wenzelm@18
   264
wenzelm@237
   265
val empty_gram = mk_gram [];
wenzelm@18
   266
wenzelm@237
   267
fun extend_gram (gram1 as Gram (prods1, _)) _ xprods2 =
wenzelm@18
   268
  let
wenzelm@237
   269
    fun symb_of (Delim s) = Some (Terminal (Token s))
wenzelm@237
   270
      | symb_of (Argument (s, p)) =
wenzelm@18
   271
          (case predef_term s of
wenzelm@237
   272
            None => Some (Nonterminal (s, p))
wenzelm@237
   273
          | Some tk => Some (Terminal tk))
wenzelm@18
   274
      | symb_of _ = None;
wenzelm@18
   275
wenzelm@237
   276
    fun prod_of (XProd (lhs, xsymbs, const, pri)) =
wenzelm@237
   277
      (lhs, (mapfilter symb_of xsymbs, const, pri));
wenzelm@18
   278
wenzelm@237
   279
    val prods2 = distinct (map prod_of xprods2);
wenzelm@18
   280
  in
wenzelm@237
   281
    if prods2 subset prods1 then gram1
wenzelm@237
   282
    else mk_gram (extend_list prods1 prods2)
wenzelm@18
   283
  end;
wenzelm@18
   284
wenzelm@237
   285
fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) =
wenzelm@237
   286
  if prods2 subset prods1 then gram1
wenzelm@237
   287
  else if prods1 subset prods2 then gram2
wenzelm@237
   288
  else mk_gram (merge_lists prods1 prods2);
wenzelm@237
   289
wenzelm@18
   290
wenzelm@237
   291
(* pretty_gram *)
wenzelm@18
   292
wenzelm@237
   293
fun pretty_gram (Gram (prods, _)) =
wenzelm@237
   294
  let
wenzelm@237
   295
    fun pretty_name name = [Pretty.str (name ^ " =")];
wenzelm@18
   296
wenzelm@237
   297
    fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s)
wenzelm@237
   298
      | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
wenzelm@237
   299
      | pretty_symb (Nonterminal (s, p)) =
wenzelm@237
   300
          Pretty.str (s ^ "[" ^ string_of_int p ^ "]");
wenzelm@18
   301
wenzelm@237
   302
    fun pretty_const "" = []
wenzelm@237
   303
      | pretty_const c = [Pretty.str ("=> " ^ quote c)];
wenzelm@237
   304
wenzelm@237
   305
    fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
wenzelm@237
   306
wenzelm@237
   307
    fun pretty_prod (name, (symbs, const, pri)) =
wenzelm@237
   308
      Pretty.block (Pretty.breaks (pretty_name name @
wenzelm@237
   309
        map pretty_symb symbs @ pretty_const const @ pretty_pri pri));
wenzelm@18
   310
  in
wenzelm@237
   311
    map pretty_prod prods
wenzelm@18
   312
  end;
wenzelm@18
   313
wenzelm@18
   314
wenzelm@18
   315
wenzelm@18
   316
(** parse **)
wenzelm@18
   317
wenzelm@237
   318
datatype parsetree =
wenzelm@237
   319
  Node of string * parsetree list |
wenzelm@237
   320
  Tip of token;
wenzelm@237
   321
wenzelm@18
   322
type state =
clasohm@330
   323
  rhss_ref * int      (*lhs: identification and production precedence*)
clasohm@330
   324
  * parsetree list    (*already parsed nonterminals on rhs*)
clasohm@330
   325
  * refsymb list      (*rest of rhs*)
clasohm@330
   326
  * string            (*name of production*)
clasohm@330
   327
  * int;              (*index for previous state list*)
wenzelm@18
   328
wenzelm@18
   329
type earleystate = state list Array.array;
wenzelm@18
   330
wenzelm@18
   331
clasohm@330
   332
(*Get all rhss with precedence >= minPrec*)
clasohm@330
   333
fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec);
wenzelm@237
   334
clasohm@330
   335
(*Get all rhss with precedence >= minPrec and < maxPrec*)
clasohm@330
   336
fun getRHS' minPrec maxPrec =
clasohm@330
   337
  filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec);
wenzelm@18
   338
clasohm@330
   339
(*Make states using a list of rhss*)
clasohm@330
   340
fun mkStates i minPrec lhsID rhss =
clasohm@330
   341
  let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i);
clasohm@330
   342
  in map mkState rhss end;
clasohm@330
   343
	
clasohm@330
   344
(*Add parse tree to list and eliminate duplicates 
clasohm@330
   345
  saving the maximum precedence*)
clasohm@330
   346
fun conc (t, prec:int) [] = (None, [(t, prec)])
clasohm@330
   347
  | conc (t, prec) ((t', prec') :: ts) =
clasohm@330
   348
      if t = t' then
clasohm@330
   349
        (Some prec', if prec' >= prec then (t', prec') :: ts 
clasohm@330
   350
                     else (t, prec) :: ts)
clasohm@330
   351
      else
clasohm@330
   352
        let val (n, ts') = conc (t, prec) ts
clasohm@330
   353
        in (n, (t', prec') :: ts') end;
wenzelm@18
   354
clasohm@330
   355
(*Update entry in used*)
wenzelm@237
   356
fun update_tree ((B, (i, ts)) :: used) (A, t) =
wenzelm@237
   357
  if A = B then
wenzelm@237
   358
    let val (n, ts') = conc t ts
wenzelm@237
   359
    in ((A, (i, ts')) :: used, n) end
wenzelm@237
   360
  else
wenzelm@237
   361
    let val (used', n) = update_tree used (A, t)
wenzelm@237
   362
    in ((B, (i, ts)) :: used', n) end;
wenzelm@18
   363
clasohm@330
   364
(*Replace entry in used*)
clasohm@330
   365
fun update_index (A, prec) used =
clasohm@330
   366
  let fun update((hd as (B, (_, ts))) :: used, used') =
clasohm@330
   367
        if A = B
clasohm@330
   368
        then used' @ ((A, (prec, ts)) :: used)
clasohm@330
   369
        else update (used, hd :: used')
clasohm@330
   370
  in update (used, []) end;
wenzelm@18
   371
clasohm@330
   372
fun getS A maxPrec Si =
wenzelm@237
   373
  filter
clasohm@330
   374
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330
   375
          => A = B andalso prec <= maxPrec
wenzelm@237
   376
      | _ => false) Si;
wenzelm@18
   377
clasohm@330
   378
fun getS' A maxPrec minPrec Si =
wenzelm@237
   379
  filter
clasohm@330
   380
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330
   381
          => A = B andalso prec > minPrec andalso prec <= maxPrec
wenzelm@237
   382
      | _ => false) Si;
wenzelm@18
   383
clasohm@330
   384
fun getStates Estate i ii A maxPrec =
wenzelm@237
   385
  filter
clasohm@330
   386
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330
   387
          => A = B andalso prec <= maxPrec
wenzelm@237
   388
      | _ => false)
wenzelm@237
   389
    (Array.sub (Estate, ii));
wenzelm@18
   390
wenzelm@18
   391
clasohm@330
   392
fun movedot_term (A, j, ts, Term a :: sa, id, i) c =
wenzelm@237
   393
  if valued_token c then
wenzelm@237
   394
    (A, j, (ts @ [Tip c]), sa, id, i)
wenzelm@237
   395
  else (A, j, ts, sa, id, i);
wenzelm@18
   396
clasohm@330
   397
fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) =
wenzelm@237
   398
  (A, j, tss @ ts, sa, id, i);
wenzelm@18
   399
wenzelm@237
   400
fun movedot_lambda _ [] = []
clasohm@330
   401
  | movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) =
wenzelm@237
   402
      if k <= ki then
wenzelm@237
   403
        (B, j, tss @ t, sa, id, i) ::
clasohm@330
   404
          movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts
clasohm@330
   405
      else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts;
wenzelm@18
   406
wenzelm@18
   407
wenzelm@18
   408
clasohm@330
   409
fun PROCESSS Estate i c states =
wenzelm@18
   410
let
clasohm@330
   411
fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c);
clasohm@330
   412
wenzelm@237
   413
fun processS used [] (Si, Sii) = (Si, Sii)
wenzelm@237
   414
  | processS used (S :: States) (Si, Sii) =
wenzelm@237
   415
      (case S of
clasohm@330
   416
        (_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) =>
clasohm@330
   417
          let                                       (*predictor operation*)
clasohm@330
   418
            val (used_new, States_new) =
clasohm@330
   419
              (case assoc (used, rhss_ref) of
clasohm@330
   420
                Some (usedPrec, l) =>       (*nonterminal has been processed*)
clasohm@330
   421
                  if usedPrec <= minPrec then
clasohm@330
   422
                                      (*wanted precedence has been processed*)
clasohm@330
   423
                    (used, movedot_lambda S l)
clasohm@330
   424
                  else            (*wanted precedence hasn't been parsed yet*)
clasohm@330
   425
                    let val rhss = get_lookahead rhss_ref;
clasohm@330
   426
                      val States' = mkStates i minPrec rhss_ref
clasohm@330
   427
                                      (getRHS' minPrec usedPrec rhss);
clasohm@330
   428
                    in (update_index (rhss_ref, minPrec) used, 
clasohm@330
   429
                        movedot_lambda S l @ States')
wenzelm@237
   430
                    end
wenzelm@18
   431
clasohm@330
   432
              | None =>           (*nonterminal is parsed for the first time*)
clasohm@330
   433
                  let val rhss = get_lookahead rhss_ref;
clasohm@330
   434
                      val States' = mkStates i minPrec rhss_ref
clasohm@330
   435
                                      (getRHS minPrec rhss);
clasohm@330
   436
                  in ((rhss_ref, (minPrec, [])) :: used, States') end)
wenzelm@18
   437
          in
clasohm@330
   438
            processS used_new (States_new @ States) (S :: Si, Sii)
wenzelm@18
   439
          end
clasohm@330
   440
      | (_, _, _, Term a :: _, _, _) =>               (*scanner operation*)
wenzelm@237
   441
          processS used States
wenzelm@237
   442
            (S :: Si,
wenzelm@237
   443
              if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
wenzelm@18
   444
clasohm@330
   445
      | (A, prec, ts, [], id, j) =>                   (*completer operation*)
wenzelm@237
   446
          let
wenzelm@237
   447
            val tt = if id = "" then ts else [Node (id, ts)]
wenzelm@18
   448
          in
clasohm@330
   449
            if j = i then                             (*lambda production?*)
wenzelm@237
   450
              let
clasohm@330
   451
                val (used', O) = update_tree used (A, (tt, prec));
wenzelm@237
   452
              in
wenzelm@237
   453
                (case O of
wenzelm@237
   454
                  None =>
wenzelm@237
   455
                    let
clasohm@330
   456
                      val Slist = getS A prec Si;
wenzelm@237
   457
                      val States' = map (movedot_nonterm tt) Slist;
wenzelm@237
   458
                    in
clasohm@330
   459
                      processS used' (States' @ States) (S :: Si, Sii)
wenzelm@237
   460
                    end
wenzelm@237
   461
                | Some n =>
clasohm@330
   462
                    if n >= prec then
wenzelm@237
   463
                      processS used' States (S :: Si, Sii)
wenzelm@237
   464
                    else
wenzelm@237
   465
                      let
clasohm@330
   466
                        val Slist = getS' A prec n Si;
wenzelm@237
   467
                        val States' = map (movedot_nonterm tt) Slist;
wenzelm@237
   468
                      in
clasohm@330
   469
                        processS used' (States' @ States) (S :: Si, Sii)
wenzelm@237
   470
                      end)
clasohm@330
   471
              end 
wenzelm@237
   472
            else
wenzelm@237
   473
              processS used
clasohm@330
   474
                (map (movedot_nonterm tt) (getStates Estate i j A prec) @ States)
wenzelm@237
   475
                (S :: Si, Sii)
wenzelm@237
   476
          end)
wenzelm@18
   477
in
wenzelm@237
   478
  processS [] states ([], [])
wenzelm@18
   479
end;
wenzelm@18
   480
wenzelm@18
   481
wenzelm@18
   482
wenzelm@18
   483
fun syntax_error toks =
wenzelm@258
   484
  error ("Syntax error at: " ^ quote (space_implode " " (map str_of_token toks)));
wenzelm@18
   485
clasohm@330
   486
fun produce stateset i indata =
wenzelm@237
   487
  (case Array.sub (stateset, i) of
wenzelm@237
   488
    [] => syntax_error indata (* MMW *)(* FIXME *)
wenzelm@237
   489
  | s =>
wenzelm@237
   490
    (case indata of
wenzelm@237
   491
      [] => Array.sub (stateset, i)
wenzelm@237
   492
    | c :: cs =>
wenzelm@237
   493
      let
clasohm@330
   494
        val (si, sii) = PROCESSS stateset i c s;
wenzelm@237
   495
      in
wenzelm@237
   496
        Array.update (stateset, i, si);
wenzelm@237
   497
        Array.update (stateset, i + 1, sii);
clasohm@330
   498
        produce stateset (i + 1) cs
wenzelm@237
   499
      end));
wenzelm@18
   500
wenzelm@18
   501
wenzelm@237
   502
val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None);
wenzelm@237
   503
wenzelm@18
   504
wenzelm@18
   505
fun earley grammar startsymbol indata =
wenzelm@237
   506
  let
clasohm@330
   507
    val rhss_ref = case assoc (grammar, startsymbol) of
clasohm@330
   508
                       Some r => r
clasohm@330
   509
                     | None => error ("parse: Unknown startsymbol " ^ 
clasohm@330
   510
                                      quote startsymbol);
clasohm@330
   511
    val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)];
clasohm@330
   512
    val s = length indata + 1;
wenzelm@237
   513
    val Estate = Array.array (s, []);
wenzelm@237
   514
  in
wenzelm@237
   515
    Array.update (Estate, 0, S0);
wenzelm@237
   516
    let
clasohm@330
   517
      val l = produce Estate 0 indata;
wenzelm@237
   518
      val p_trees = get_trees l;
wenzelm@237
   519
    in p_trees end
wenzelm@237
   520
  end;
wenzelm@18
   521
wenzelm@18
   522
wenzelm@237
   523
fun parse (Gram (_, prod_tab)) start toks =
wenzelm@237
   524
  (case earley prod_tab start toks of
wenzelm@237
   525
    [] => sys_error "parse: no parse trees"
clasohm@330
   526
  | pts => pts);
wenzelm@18
   527
wenzelm@18
   528
end;
wenzelm@18
   529