src/Pure/Syntax/parser.ML
author lcp
Thu, 12 Jan 1995 03:03:07 +0100
changeset 855 4c8d0ece1f95
parent 844 5e59370243fa
child 890 2b7275b13bef
permissions -rw-r--r--
Added singleton_iff, Sigma_empty1, Sigma_empty2, Collect_simps and UnInt_simps to ZF_ss. Added consI1 to ZF_typechecks.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/Syntax/parser.ML
46
f0f4978af183 *** empty log message ***
wenzelm
parents: 18
diff changeset
     2
    ID:         $Id$
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
     3
    Author:     Sonia Mahjoub, Markus Wenzel and Carsten Clasohm, TU Muenchen
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     4
552
fc92fac8b0de removed roots arg of extend_gram;
wenzelm
parents: 395
diff changeset
     5
Isabelle's main parser (used for terms and types).
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     6
*)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     7
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     8
signature PARSER =
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
     9
sig
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    10
  structure Lexicon: LEXICON
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    11
  structure SynExt: SYN_EXT
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    12
  local open Lexicon SynExt SynExt.Ast in
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    13
    type gram
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    14
    val empty_gram: gram
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
    15
    val extend_gram: gram -> string list -> xprod list -> gram
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    16
    val merge_grams: gram -> gram -> gram
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    17
    val pretty_gram: gram -> Pretty.T list
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    18
    datatype parsetree =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    19
      Node of string * parsetree list |
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    20
      Tip of token
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    21
    val parse: gram -> string -> token list -> parsetree list
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    22
  end
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
    23
  val branching_level: int ref;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    24
end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    25
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    26
functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON
552
fc92fac8b0de removed roots arg of extend_gram;
wenzelm
parents: 395
diff changeset
    27
  and SynExt: SYN_EXT): PARSER =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    28
struct
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    29
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    30
structure Pretty = SynExt.Ast.Pretty;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    31
structure Lexicon = Lexicon;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    32
structure SynExt = SynExt;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    33
open Lexicon SynExt;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    34
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    35
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    36
(** datatype gram **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    37
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    38
datatype symb =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    39
  Terminal of token |
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
    40
  Nonterminal of string * int;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
    41
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    42
datatype refsymb = Term of token | Nonterm of rhss_ref * int
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    43
                                (*reference to production list instead of name*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    44
and gram = Gram of (string * (symb list * string * int)) list *
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    45
                   (string * rhss_ref) list
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    46
withtype rhss_ref = (token option * (refsymb list * string * int) list) list ref
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    47
                                      (*lookahead table: token and productions*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    48
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    49
(* convert productions to reference grammar with lookaheads and eliminate
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    50
   chain productions *)
373
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
    51
fun mk_gram prods =
395
712dceb1ecc7 "Building new grammar" message is no longer displayed by empty_gram
clasohm
parents: 377
diff changeset
    52
  let (*get reference on list of all possible rhss for nonterminal lhs
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    53
        (if it doesn't exist a new one is created and added to the nonterminal
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    54
         list)*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    55
      fun get_rhss ref_prods lhs =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    56
        case assoc (ref_prods, lhs) of
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    57
            None =>
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    58
              let val l = ref [(None, [])]
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    59
              in (l, (lhs, l) :: ref_prods) end
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    60
          | Some l => (l, ref_prods);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    61
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    62
      (*convert symb list to refsymb list*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    63
      fun mk_refsymbs ref_prods [] rs = (rs, ref_prods)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    64
        | mk_refsymbs ref_prods (Terminal tk :: symbs) rs =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    65
            mk_refsymbs ref_prods symbs (rs @ [Term tk])
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    66
        | mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    67
            let val (rhss, ref_prods') = get_rhss ref_prods name
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    68
            in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)])
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    69
            end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    70
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    71
      (*convert prod list to (string * rhss_ref) list
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    72
        without computing lookaheads*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    73
      fun mk_ref_gram [] ref_prods = ref_prods
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    74
        | mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    75
            let val (rhs', ref_prods') = get_rhss ref_prods lhs;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    76
                val (dummy, rhss) = hd (!rhs');
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    77
                val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs [];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    78
            in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    79
               mk_ref_gram ps ref_prods''
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    80
            end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    81
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    82
      (*eliminate chain productions*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    83
      fun elim_chain ref_gram =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    84
        let (*make a list of pairs representing chain productions and delete
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    85
              these productions*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    86
            fun list_chain [] = []
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    87
              | list_chain ((_, rhss_ref) :: ps) =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    88
                  let fun lists [] new_rhss chains = (new_rhss, chains)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    89
                        | lists (([Nonterm (id2, ~1)], _, ~1) :: rs) 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    90
                                new_rhss chains =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    91
                            lists rs new_rhss ((rhss_ref, id2) :: chains)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    92
                        | lists (rhs :: rs) new_rhss chains = 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    93
                            lists rs (rhs :: new_rhss) chains;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    94
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    95
                      val (dummy, rhss) = hd (!rhss_ref);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    96
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    97
                      val (new_rhss, chains) = lists rhss [] [];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    98
                  in rhss_ref := [(dummy, new_rhss)];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
    99
                     chains @ (list_chain ps)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   100
                  end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   101
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   102
            (*convert a list of pairs to an association list
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   103
              by using the first element as the key*)
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   104
            fun mk_assoc pairs =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   105
              let fun doit [] result = result
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   106
                    | doit ((id1, id2) :: ps) result =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   107
                        doit ps 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   108
                        (overwrite (result, (id1, id2 :: (assocs result id1))));
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   109
              in doit pairs [] end;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   110
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   111
            (*replace reference by list of rhss in chain pairs*)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   112
            fun deref (id1, ids) =
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   113
              let fun deref1 [] = []
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   114
                    | deref1 (id :: ids) =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   115
                        let val (_, rhss) = hd (!id);
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   116
                        in rhss @ (deref1 ids) end;
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   117
              in (id1, deref1 ids) end;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   118
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   119
            val chain_pairs = 
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   120
               map deref (transitive_closure (mk_assoc (list_chain ref_gram)));
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   121
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   122
            (*add new rhss to productions*)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   123
            fun elim (rhss_ref, rhss) =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   124
              let val (dummy, old_rhss) = hd (!rhss_ref);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   125
              in rhss_ref := [(dummy, old_rhss @ rhss)] end;
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   126
        in map elim chain_pairs;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   127
           ref_gram
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   128
        end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   129
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   130
      val ref_gram = elim_chain (mk_ref_gram prods []);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   131
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   132
      (*make a list of all lambda NTs 
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   133
        (i.e. nonterminals that can produce lambda)*)
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   134
      val lambdas =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   135
        let fun lambda [] result = result
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   136
              | lambda ((_, rhss_ref) :: nts) result =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   137
                  if rhss_ref mem result then
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   138
                    lambda nts result
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   139
                  else
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   140
                    let (*list all NTs that can be produced by a rhs
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   141
                          containing only lambda NTs*)
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   142
                        fun only_lambdas [] result = result
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   143
                          | only_lambdas ((_, rhss_ref) :: ps) result =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   144
                              let fun only (symbs, _, _) =
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   145
                                  forall (fn (Nonterm (id, _)) => id mem result
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   146
                                           | (Term _)          => false) symbs;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   147
                          
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   148
                                  val (_, rhss) = hd (!rhss_ref);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   149
                              in if not (rhss_ref mem result) andalso
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   150
                                    exists only rhss then
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   151
                                   only_lambdas ref_gram (rhss_ref :: result)
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   152
                                 else
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   153
                                   only_lambdas ps result
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   154
                              end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   155
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   156
                        val (_, rhss) = hd (!rhss_ref);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   157
                    in if exists (fn (symbs, _, _) => null symbs) rhss
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   158
                       then lambda nts
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   159
                              (only_lambdas ref_gram (rhss_ref :: result))
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   160
                       else lambda nts result
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   161
                    end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   162
         in lambda ref_gram [] end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   163
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   164
      (*list all nonterminals on which the lookahead depends (due to lambda 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   165
        NTs this can be more than one)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   166
        and report if there is a terminal at the 'start'*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   167
      fun rhss_start [] skipped = (None, skipped)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   168
        | rhss_start (Term tk :: _) skipped = (Some tk, skipped)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   169
        | rhss_start (Nonterm (rhss_ref, _) :: rest) skipped =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   170
            if rhss_ref mem lambdas then 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   171
              rhss_start rest (rhss_ref ins skipped)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   172
            else
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   173
              (None, rhss_ref ins skipped);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   174
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   175
      (*list all terminals that can start the given rhss*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   176
      fun look_rhss starts rhss_ref =
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   177
        let fun look [] _ result = result
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   178
              | look ((symbs, _, _) :: todos) done result =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   179
                  let val (start_token, skipped) = rhss_start symbs [];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   180
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   181
                      (*process all nonterminals on which the lookahead
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   182
                        depends and build the new todo and done lists for
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   183
                        the look function*)
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   184
                      fun look2 [] todos result = 
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   185
                            look todos (done union skipped) result
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   186
                        | look2 (rhss_ref :: ls) todos result =
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   187
                            if rhss_ref mem done then look2 ls todos result
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   188
                            else case assoc (starts, rhss_ref) of
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   189
                                Some tks => look2 ls todos (tks union result)
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   190
                              | None => 
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   191
                                  let val (_, rhss) = hd (!rhss_ref);
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   192
                                  in look2 ls (rhss @ todos) result end;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   193
                  in case start_token of
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   194
                        Some tk => look2 skipped todos (start_token ins result)
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   195
                      | None => look2 skipped todos result
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   196
                  end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   197
 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   198
            val (_, rhss) = hd (!rhss_ref);
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   199
        in look rhss [rhss_ref] [] end;                       
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   200
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   201
      (*make a table that contains all possible starting terminals
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   202
        for each nonterminal*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   203
      fun mk_starts [] starts = starts
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   204
        | mk_starts ((_, rhss_ref) :: ps) starts =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   205
            mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   206
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   207
      val starts = mk_starts ref_gram [];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   208
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   209
      (*add list of allowed starting tokens to productions*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   210
      fun mk_lookahead (_, rhss_ref) =
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   211
        let (*compares two values of type token option 
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   212
              (used for speed reasons)*)
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   213
            fun matching_opt_tks (Some tk1, Some tk2) =
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   214
                  matching_tokens (tk1, tk2)
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   215
              | matching_opt_tks _ = false;
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   216
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   217
            (*add item to lookahead list (a list containing pairs of token and 
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   218
              rhss that can be started with it)*)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   219
            fun add_start new_rhs tokens table =
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   220
                  let fun add [] [] = []
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   221
                        | add (tk :: tks) [] =
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   222
                            (tk, [new_rhs]) :: (add tks [])
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   223
                        | add tokens ((tk, rhss) :: ss) =
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   224
                            if gen_mem matching_opt_tks (tk, tokens) then 
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   225
                              (tk, new_rhs :: rhss) :: (add (tokens \ tk) ss)
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   226
                            else
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   227
                              (tk, rhss) :: (add tokens ss);
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   228
                  in add tokens table end;
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   229
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   230
            (*combine all lookaheads of a list of nonterminals*)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   231
            fun combine_starts rhss_refs =
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   232
              foldr (gen_union matching_opt_tks)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   233
              ((map (fn rhss_ref => let val Some tks = assoc (starts, rhss_ref)
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   234
                                    in tks end) rhss_refs), []);
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   235
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   236
            (*get lookahead for a rhs and update lhs' lookahead list*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   237
            fun look_rhss [] table = table
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   238
              | look_rhss ((rhs as (symbs, id, prec)) :: rs) table =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   239
                  let val (start_token, skipped) = rhss_start symbs [];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   240
                      val starts = case start_token of
377
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   241
                                     Some tk => gen_ins matching_opt_tks 
ab8917806779 lookaheads are now computed faster (during the grammar is built)
clasohm
parents: 373
diff changeset
   242
                                             (Some tk, combine_starts skipped)
345
7007562172b1 made a few cosmetic changes
clasohm
parents: 330
diff changeset
   243
                                   | None => if skipped subset lambdas then
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   244
                                               [None]
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   245
                                             else
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   246
                                               combine_starts skipped;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   247
                  in look_rhss rs (add_start rhs starts table) end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   248
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   249
             val (_, rhss) = hd (!rhss_ref);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   250
        in rhss_ref := look_rhss rhss [] end;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   251
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   252
  in map mk_lookahead ref_gram;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   253
     Gram (prods, ref_gram)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   254
  end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   255
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   256
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   257
(* empty, extend, merge grams *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   258
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   259
val empty_gram = mk_gram [];
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   260
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   261
fun extend_gram (gram1 as Gram (prods1, _)) roots xprods2 =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   262
  let
764
b60e77395d1a changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents: 697
diff changeset
   263
    fun symb_of (Delim s) = Some (Terminal (Token s))
b60e77395d1a changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents: 697
diff changeset
   264
      | symb_of (Argument (s, p)) =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   265
          (case predef_term s of
764
b60e77395d1a changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents: 697
diff changeset
   266
            None => Some (Nonterminal (s, p))
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   267
          | Some tk => Some (Terminal tk))
764
b60e77395d1a changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents: 697
diff changeset
   268
      | symb_of _ = None;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   269
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   270
    fun prod_of (XProd (lhs, xsymbs, const, pri)) =
764
b60e77395d1a changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents: 697
diff changeset
   271
      (lhs, (mapfilter symb_of xsymbs, const, pri));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   272
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   273
    val prods2 = distinct (map prod_of xprods2);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   274
  in
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   275
    if prods2 subset prods1 then gram1
395
712dceb1ecc7 "Building new grammar" message is no longer displayed by empty_gram
clasohm
parents: 377
diff changeset
   276
    else (writeln "Building new grammar...";
712dceb1ecc7 "Building new grammar" message is no longer displayed by empty_gram
clasohm
parents: 377
diff changeset
   277
          mk_gram (extend_list prods1 prods2))
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   278
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   279
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   280
fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   281
  if prods2 subset prods1 then gram1
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   282
  else if prods1 subset prods2 then gram2
395
712dceb1ecc7 "Building new grammar" message is no longer displayed by empty_gram
clasohm
parents: 377
diff changeset
   283
  else (writeln "Building new grammar...";
712dceb1ecc7 "Building new grammar" message is no longer displayed by empty_gram
clasohm
parents: 377
diff changeset
   284
        mk_gram (merge_lists prods1 prods2));
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   285
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   286
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   287
(* pretty_gram *)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   288
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   289
fun pretty_gram (Gram (prods, _)) =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   290
  let
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   291
    fun pretty_name name = [Pretty.str (name ^ " =")];
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   292
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   293
    fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   294
      | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   295
      | pretty_symb (Nonterminal (s, p)) =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   296
          Pretty.str (s ^ "[" ^ string_of_int p ^ "]");
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   297
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   298
    fun pretty_const "" = []
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   299
      | pretty_const c = [Pretty.str ("=> " ^ quote c)];
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   300
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   301
    fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   302
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   303
    fun pretty_prod (name, (symbs, const, pri)) =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   304
      Pretty.block (Pretty.breaks (pretty_name name @
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   305
        map pretty_symb symbs @ pretty_const const @ pretty_pri pri));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   306
  in
844
5e59370243fa pretty_gram: now sorts productions;
wenzelm
parents: 764
diff changeset
   307
    map pretty_prod (sort (op <= o pairself #1) prods)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   308
  end;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   309
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   310
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   311
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   312
(** parse **)
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   313
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   314
datatype parsetree =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   315
  Node of string * parsetree list |
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   316
  Tip of token;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   317
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   318
type state =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   319
  rhss_ref * int      (*lhs: identification and production precedence*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   320
  * parsetree list    (*already parsed nonterminals on rhs*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   321
  * refsymb list      (*rest of rhs*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   322
  * string            (*name of production*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   323
  * int;              (*index for previous state list*)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   324
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   325
type earleystate = state list Array.array;
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   326
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   327
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   328
(*Get all rhss with precedence >= minPrec*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   329
fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec);
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   330
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   331
(*Get all rhss with precedence >= minPrec and < maxPrec*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   332
fun getRHS' minPrec maxPrec =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   333
  filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   334
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   335
(*Make states using a list of rhss*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   336
fun mkStates i minPrec lhsID rhss =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   337
  let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   338
  in map mkState rhss end;
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   339
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   340
(*Add parse tree to list and eliminate duplicates 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   341
  saving the maximum precedence*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   342
fun conc (t, prec:int) [] = (None, [(t, prec)])
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   343
  | conc (t, prec) ((t', prec') :: ts) =
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   344
      if t = t' then
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   345
        (Some prec', if prec' >= prec then (t', prec') :: ts 
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   346
                     else (t, prec) :: ts)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   347
      else
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   348
        let val (n, ts') = conc (t, prec) ts
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   349
        in (n, (t', prec') :: ts') end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   350
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   351
(*Update entry in used*)
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   352
fun update_trees ((B, (i, ts)) :: used) (A, t) =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   353
  if A = B then
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   354
    let val (n, ts') = conc t ts
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   355
    in ((A, (i, ts')) :: used, n) end
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   356
  else
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   357
    let val (used', n) = update_trees used (A, t)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   358
    in ((B, (i, ts)) :: used', n) end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   359
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   360
(*Replace entry in used*)
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   361
fun update_prec (A, prec) used =
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   362
  let fun update ((hd as (B, (_, ts))) :: used, used') =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   363
        if A = B
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   364
        then used' @ ((A, (prec, ts)) :: used)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   365
        else update (used, hd :: used')
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   366
  in update (used, []) end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   367
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   368
fun getS A maxPrec Si =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   369
  filter
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   370
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   371
          => A = B andalso prec <= maxPrec
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   372
      | _ => false) Si;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   373
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   374
fun getS' A maxPrec minPrec Si =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   375
  filter
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   376
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   377
          => A = B andalso prec > minPrec andalso prec <= maxPrec
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   378
      | _ => false) Si;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   379
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   380
fun getStates Estate i ii A maxPrec =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   381
  filter
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   382
    (fn (_, _, _, Nonterm (B, prec) :: _, _, _)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   383
          => A = B andalso prec <= maxPrec
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   384
      | _ => false)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   385
    (Array.sub (Estate, ii));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   386
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   387
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   388
fun movedot_term (A, j, ts, Term a :: sa, id, i) c =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   389
  if valued_token c then
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   390
    (A, j, ts @ [Tip c], sa, id, i)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   391
  else (A, j, ts, sa, id, i);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   392
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   393
fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   394
  (A, j, tss @ ts, sa, id, i);
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   395
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   396
fun movedot_lambda _ [] = []
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   397
  | movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   398
      if k <= ki then
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   399
        (B, j, tss @ t, sa, id, i) ::
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   400
          movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   401
      else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   402
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   403
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
   404
val warned = ref false;  (*flag for warning message*)
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   405
val branching_level = ref 200;  (*trigger value for warnings*)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   406
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   407
fun PROCESSS Estate i c states =
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   408
let
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   409
fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c);
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   410
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   411
fun processS used [] (Si, Sii) = (Si, Sii)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   412
  | processS used (S :: States) (Si, Sii) =
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   413
      (case S of
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   414
        (_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) =>
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   415
          let                                       (*predictor operation*)
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   416
            val (used', new_states) =
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   417
              (case assoc (used, rhss_ref) of
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   418
                Some (usedPrec, l) =>       (*nonterminal has been processed*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   419
                  if usedPrec <= minPrec then
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   420
                                      (*wanted precedence has been processed*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   421
                    (used, movedot_lambda S l)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   422
                  else            (*wanted precedence hasn't been parsed yet*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   423
                    let val rhss = get_lookahead rhss_ref;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   424
                      val States' = mkStates i minPrec rhss_ref
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   425
                                      (getRHS' minPrec usedPrec rhss);
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   426
                    in (update_prec (rhss_ref, minPrec) used, 
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   427
                        movedot_lambda S l @ States')
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   428
                    end
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   429
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   430
              | None =>           (*nonterminal is parsed for the first time*)
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   431
                  let val rhss = get_lookahead rhss_ref;
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   432
                      val States' = mkStates i minPrec rhss_ref
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   433
                                      (getRHS minPrec rhss);
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   434
                  in ((rhss_ref, (minPrec, [])) :: used, States') end);
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   435
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   436
            val _ = if not (!warned) andalso
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   437
                       length (new_states @ States) > (!branching_level) then
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   438
                     (writeln "Warning: Currently parsed expression could be \
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   439
                              \extremely ambiguous.";
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   440
                      warned := true)
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   441
                    else ()
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   442
          in
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   443
            processS used' (new_states @ States) (S :: Si, Sii)
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   444
          end
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   445
      | (_, _, _, Term a :: _, _, _) =>               (*scanner operation*)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   446
          processS used States
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   447
            (S :: Si,
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   448
              if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   449
      | (A, prec, ts, [], id, j) =>                   (*completer operation*)
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   450
          let val tt = if id = "" then ts else [Node (id, ts)] in
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   451
            if j = i then                             (*lambda production?*)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   452
              let
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   453
                val (used', O) = update_trees used (A, (tt, prec));
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   454
              in
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   455
                (case O of
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   456
                  None =>
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   457
                    let
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   458
                      val Slist = getS A prec Si;
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   459
                      val States' = map (movedot_nonterm tt) Slist;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   460
                    in
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   461
                      processS used' (States' @ States) (S :: Si, Sii)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   462
                    end
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   463
                | Some n =>
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   464
                    if n >= prec then
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   465
                      processS used' States (S :: Si, Sii)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   466
                    else
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   467
                      let
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   468
                        val Slist = getS' A prec n Si;
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   469
                        val States' = map (movedot_nonterm tt) Slist;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   470
                      in
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
   471
                       processS used' (States' @ States) (S :: Si, Sii)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   472
                      end)
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   473
              end 
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   474
            else
697
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   475
              let val Slist = getStates Estate i j A prec in
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   476
                processS used (map (movedot_nonterm tt) Slist @ States)
40f72ab196f8 changed warning for extremely ambiguous expressions
clasohm
parents: 682
diff changeset
   477
                         (S :: Si, Sii)
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
   478
              end
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   479
          end)
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
   480
in processS [] states ([], []) end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   481
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   482
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   483
fun syntax_error toks allowed =
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   484
  error 
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   485
  ((if toks = [] then
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   486
      "error: unexpected end of input\n"
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   487
    else
367
b734dc03067e syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents: 362
diff changeset
   488
      "Syntax error at: " ^ quote (space_implode " " (map str_of_token 
b734dc03067e syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents: 362
diff changeset
   489
        ((rev o tl o rev) toks)))
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   490
      ^ "\n")
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   491
   ^ "Expected tokens: " 
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   492
   ^ space_implode ", " (map (quote o str_of_token) allowed));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   493
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   494
fun produce stateset i indata prev_token =
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   495
                     (*the argument prev_token is only used for error messages*)
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   496
  (case Array.sub (stateset, i) of
373
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   497
    [] => let (*similar to token_assoc but does not automatically 
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   498
                include 'None' key*)
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   499
              fun token_assoc2 (list, key) =
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   500
                let fun assoc [] = []
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   501
                      | assoc ((keyi, xi) :: pairs) =
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   502
                          if is_some keyi andalso
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   503
                             matching_tokens (the keyi, key) then 
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   504
                            (assoc pairs) @ xi
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   505
                          else assoc pairs;
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   506
                          in assoc list end;
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   507
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   508
              (*test if tk is a lookahead for a given minimum precedence*)
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   509
              fun reduction minPrec tk _ (Term _ :: _, _, prec:int) =
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   510
                    if prec >= minPrec then true
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   511
                    else false
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   512
                | reduction minPrec tk checked 
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   513
                            (Nonterm (rhss_ref, NTprec)::_,_, prec) =
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   514
                    if prec >= minPrec andalso not (rhss_ref mem checked) then
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   515
                      exists (reduction NTprec tk (rhss_ref :: checked)) 
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   516
                             (token_assoc2 (!rhss_ref, tk))
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   517
                    else false;
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   518
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   519
              (*compute a list of allowed starting tokens 
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   520
                for a list of nonterminals considering precedence*)
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   521
              fun get_starts [] = []
372
40d565e51dea syntax_error now checks precedences when computing expected tokens
clasohm
parents: 367
diff changeset
   522
                | get_starts ((rhss_ref, minPrec:int) :: refs) =
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   523
                    let fun get [] = []
373
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   524
                          | get ((Some tk, prods) :: rhss) =
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   525
                              if exists (reduction minPrec tk [rhss_ref]) prods
68400ea32f7b fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents: 372
diff changeset
   526
                              then tk :: (get rhss)
372
40d565e51dea syntax_error now checks precedences when computing expected tokens
clasohm
parents: 367
diff changeset
   527
                              else get rhss
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   528
                          | get ((None, _) :: rhss) =
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   529
                              get rhss;
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   530
                    in (get (!rhss_ref)) union (get_starts refs) end;
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   531
372
40d565e51dea syntax_error now checks precedences when computing expected tokens
clasohm
parents: 367
diff changeset
   532
              val NTs = map (fn (_, _, _, Nonterm (a, prec) :: _, _, _) => 
40d565e51dea syntax_error now checks precedences when computing expected tokens
clasohm
parents: 367
diff changeset
   533
                              (a, prec))
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   534
                            (filter (fn (_, _, _, Nonterm _ :: _, _, _) => true
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   535
                                     | _ => false) (Array.sub (stateset, i-1)));
367
b734dc03067e syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents: 362
diff changeset
   536
              val allowed = distinct (get_starts NTs @
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   537
                            (map (fn (_, _, _, Term a :: _, _, _) => a)
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   538
                            (filter (fn (_, _, _, Term _ :: _, _, _) => true
367
b734dc03067e syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents: 362
diff changeset
   539
                                   | _ => false) (Array.sub (stateset, i-1)))));
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   540
          in syntax_error (if prev_token = EndToken then indata
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   541
                           else prev_token :: indata) allowed
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   542
          end
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   543
  | s =>
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   544
    (case indata of
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   545
      [] => Array.sub (stateset, i)
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   546
    | c :: cs =>
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   547
      let
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   548
        val (si, sii) = PROCESSS stateset i c s;
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   549
      in
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   550
        Array.update (stateset, i, si);
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   551
        Array.update (stateset, i + 1, sii);
362
6bea8fdc0e70 improved syntax error:
clasohm
parents: 345
diff changeset
   552
        produce stateset (i + 1) cs c
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   553
      end));
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   554
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   555
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   556
val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None);
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   557
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   558
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   559
fun earley grammar startsymbol indata =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   560
  let
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   561
    val rhss_ref = case assoc (grammar, startsymbol) of
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   562
                       Some r => r
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   563
                     | None   => error ("parse: Unknown startsymbol " ^ 
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   564
                                        quote startsymbol);
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   565
    val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)];
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   566
    val s = length indata + 1;
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   567
    val Estate = Array.array (s, []);
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   568
  in
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   569
    Array.update (Estate, 0, S0);
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   570
    let
682
c36f49c76d22 added warning message
clasohm
parents: 624
diff changeset
   571
      val l = (warned := false; produce Estate 0 indata EndToken(*dummy*));
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   572
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   573
      val p_trees = get_trees l;
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   574
    in p_trees end
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   575
  end;
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   576
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   577
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   578
fun parse (Gram (_, prod_tab)) start toks =
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   579
let val r =
237
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   580
  (case earley prod_tab start toks of
a7d3e712767a MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents: 46
diff changeset
   581
    [] => sys_error "parse: no parse trees"
330
2fda15dd1e0f changed the way a grammar is generated to allow the new parser to work;
clasohm
parents: 258
diff changeset
   582
  | pts => pts);
624
33b9b5da3e6f made major changes to grammar;
clasohm
parents: 552
diff changeset
   583
in r end
18
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   584
c9ec452ff08f lots of internal cleaning and tuning;
wenzelm
parents:
diff changeset
   585
end;