src/Pure/Syntax/parser.ML
changeset 15752 8cdc6249044b
parent 15570 8d8c70b41bab
child 15979 c81578ac2d31
equal deleted inserted replaced
15751:65e4790c7914 15752:8cdc6249044b
     4 
     4 
     5 Isabelle's main parser (used for terms and types).
     5 Isabelle's main parser (used for terms and types).
     6 *)
     6 *)
     7 
     7 
     8 signature PARSER =
     8 signature PARSER =
     9   sig
     9 sig
    10   type gram
    10   type gram
    11   val empty_gram: gram
    11   val empty_gram: gram
    12   val extend_gram: gram -> SynExt.xprod list -> gram
    12   val extend_gram: gram -> SynExt.xprod list -> gram
       
    13   val make_gram: SynExt.xprod list -> gram
    13   val merge_grams: gram -> gram -> gram
    14   val merge_grams: gram -> gram -> gram
    14   val pretty_gram: gram -> Pretty.T list
    15   val pretty_gram: gram -> Pretty.T list
    15   datatype parsetree =
    16   datatype parsetree =
    16     Node of string * parsetree list |
    17     Node of string * parsetree list |
    17     Tip of Lexicon.token
    18     Tip of Lexicon.token
    18   val parse: gram -> string -> Lexicon.token list -> parsetree list
    19   val parse: gram -> string -> Lexicon.token list -> parsetree list
    19   val branching_level: int ref
    20   val branching_level: int ref
    20   end;
    21 end;
    21 
    22 
    22 
    23 structure Parser: PARSER =
    23 structure Parser : PARSER =
       
    24 struct
    24 struct
       
    25 
    25 open Lexicon SynExt;
    26 open Lexicon SynExt;
       
    27 
    26 
    28 
    27 (** datatype gram **)
    29 (** datatype gram **)
    28 
    30 
    29 type nt_tag = int;              (*production for the NTs are stored in an array
    31 type nt_tag = int;              (*production for the NTs are stored in an array
    30                                   so we can identify NTs by their index*)
    32                                   so we can identify NTs by their index*)
    52                          and also as an entry in "lambdas"*)
    54                          and also as an entry in "lambdas"*)
    53 
    55 
    54 val UnknownStart = EndToken;       (*productions for which no starting token is
    56 val UnknownStart = EndToken;       (*productions for which no starting token is
    55                                      known yet are associated with this token*)
    57                                      known yet are associated with this token*)
    56 
    58 
    57 (* get all NTs that are connected with a list of NTs 
    59 (* get all NTs that are connected with a list of NTs
    58    (used for expanding chain list)*)
    60    (used for expanding chain list)*)
    59 fun connected_with _ [] relatives = relatives
    61 fun connected_with _ [] relatives = relatives
    60   | connected_with chains (root :: roots) relatives =
    62   | connected_with chains (root :: roots) relatives =
    61     let val branches = (assocs chains root) \\ relatives;
    63     let val branches = (assocs chains root) \\ relatives;
    62     in connected_with chains (branches @ roots) (branches @ relatives) end;
    64     in connected_with chains (branches @ roots) (branches @ relatives) end;
    66    prod_count is of type "int option" and is only updated if it is <> NONE*)
    68    prod_count is of type "int option" and is only updated if it is <> NONE*)
    67 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count)
    69 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count)
    68   | add_prods prods chains lambdas prod_count
    70   | add_prods prods chains lambdas prod_count
    69               ((lhs, new_prod as (rhs, name, pri)) :: ps) =
    71               ((lhs, new_prod as (rhs, name, pri)) :: ps) =
    70     let
    72     let
    71       (*test if new_prod is a chain production*)
    73       val chain_from = case (pri, rhs) of (~1, [Nonterminal (id, ~1)]) => SOME id | _ => NONE;
    72       val (new_chain, chains') =
    74 
    73         let (*store chain if it does not already exist*)
    75       (*store chain if it does not already exist*)
    74             fun store_chain from =
    76       val (new_chain, chains') = case chain_from of NONE => (NONE, chains) | SOME from =>
    75               let val old_tos = assocs chains from;
    77         let val old_tos = assocs chains from in
    76               in if lhs mem old_tos then (NONE, chains)
    78           if lhs mem old_tos then (NONE, chains)
    77                  else (SOME from, overwrite (chains, (from, lhs ins old_tos)))
    79           else (SOME from, overwrite (chains, (from, lhs ins old_tos)))
    78               end;
       
    79         in if pri = ~1 then
       
    80              case rhs of [Nonterminal (id, ~1)] => store_chain id
       
    81                        | _ => (NONE, chains)
       
    82            else (NONE, chains)
       
    83         end;
    80         end;
    84 
    81 
    85       (*propagate new chain in lookahead and lambda lists;
    82       (*propagate new chain in lookahead and lambda lists;
    86         added_starts is used later to associate existing
    83         added_starts is used later to associate existing
    87         productions with new starting tokens*)
    84         productions with new starting tokens*)
   138             | propagate_lambda (l :: ls) added_starts lambdas =
   135             | propagate_lambda (l :: ls) added_starts lambdas =
   139               let
   136               let
   140                 (*get lookahead for lambda NT*)
   137                 (*get lookahead for lambda NT*)
   141                 val ((dependent, l_starts), _) = Array.sub (prods, l);
   138                 val ((dependent, l_starts), _) = Array.sub (prods, l);
   142 
   139 
   143                 (*check productions whose lookahead may depend on lamdba NT*)
   140                 (*check productions whose lookahead may depend on lambda NT*)
   144                 fun examine_prods [] add_lambda nt_dependencies added_tks
   141                 fun examine_prods [] add_lambda nt_dependencies added_tks
   145                                   nt_prods =
   142                                   nt_prods =
   146                       (add_lambda, nt_dependencies, added_tks, nt_prods)
   143                       (add_lambda, nt_dependencies, added_tks, nt_prods)
   147                   | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda
   144                   | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda
   148                       nt_dependencies added_tks nt_prods =
   145                       nt_dependencies added_tks nt_prods =
   153                         val new_lambda = is_none tk andalso nts subset lambdas;
   150                         val new_lambda = is_none tk andalso nts subset lambdas;
   154 
   151 
   155                         val new_tks = (if isSome tk then [valOf tk] else []) @
   152                         val new_tks = (if isSome tk then [valOf tk] else []) @
   156                           Library.foldl token_union ([], map starts_for_nt nts) \\
   153                           Library.foldl token_union ([], map starts_for_nt nts) \\
   157                           l_starts;
   154                           l_starts;
   158   
   155 
   159                         val added_tks' = token_union (new_tks, added_tks);
   156                         val added_tks' = token_union (new_tks, added_tks);
   160 
   157 
   161                         val nt_dependencies' = nts union nt_dependencies;
   158                         val nt_dependencies' = nts union nt_dependencies;
   162 
   159 
   163                         (*associate production with new starting tokens*)
   160                         (*associate production with new starting tokens*)
   166                             let val old_prods = assocs nt_prods tk;
   163                             let val old_prods = assocs nt_prods tk;
   167 
   164 
   168                                 val prods' = p :: old_prods;
   165                                 val prods' = p :: old_prods;
   169                             in copy tks (overwrite (nt_prods, (tk, prods')))
   166                             in copy tks (overwrite (nt_prods, (tk, prods')))
   170                             end;
   167                             end;
   171   
   168 
   172                         val nt_prods' =
   169                         val nt_prods' =
   173                           let val new_opt_tks = map SOME new_tks;
   170                           let val new_opt_tks = map SOME new_tks;
   174                           in copy ((if new_lambda then [NONE] else []) @
   171                           in copy ((if new_lambda then [NONE] else []) @
   175                                    new_opt_tks) nt_prods
   172                                    new_opt_tks) nt_prods
   176                           end;
   173                           end;
   229               end;
   226               end;
   230         in propagate_lambda (lambdas' \\ lambdas) added_starts lambdas' end;
   227         in propagate_lambda (lambdas' \\ lambdas) added_starts lambdas' end;
   231 
   228 
   232       (*insert production into grammar*)
   229       (*insert production into grammar*)
   233       val (added_starts', prod_count') =
   230       val (added_starts', prod_count') =
   234         if isSome new_chain then (added_starts', prod_count)
   231         if isSome chain_from then (added_starts', prod_count)  (*don't store chain production*)
   235                                                (*don't store chain production*)
       
   236         else let
   232         else let
   237           (*lookahead tokens of new production and on which
   233           (*lookahead tokens of new production and on which
   238             NTs lookahead depends*)
   234             NTs lookahead depends*)
   239           val (start_tk, start_nts) = lookahead_dependency lambdas' rhs [];
   235           val (start_tk, start_nts) = lookahead_dependency lambdas' rhs [];
   240 
   236 
   417 
   413 
   418         val nt_prods =
   414         val nt_prods =
   419           Library.foldl (op union) ([], map snd (snd (Array.sub (prods, tag)))) @
   415           Library.foldl (op union) ([], map snd (snd (Array.sub (prods, tag)))) @
   420           map prod_of_chain (assocs chains tag);
   416           map prod_of_chain (assocs chains tag);
   421       in map (pretty_prod name) nt_prods end;
   417       in map (pretty_prod name) nt_prods end;
   422         
   418 
   423   in List.concat (map pretty_nt taglist) end;
   419   in List.concat (map pretty_nt taglist) end;
   424 
   420 
   425 
   421 
   426 (** Operations on gramars **)
   422 (** Operations on gramars **)
   427 
   423 
   504     val chains' = inverse_chains fromto_chains' [];
   500     val chains' = inverse_chains fromto_chains' [];
   505   in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
   501   in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
   506            chains = chains', lambdas = lambdas', prods = prods'}
   502            chains = chains', lambdas = lambdas', prods = prods'}
   507   end;
   503   end;
   508 
   504 
       
   505 val make_gram = extend_gram empty_gram;
       
   506 
   509 
   507 
   510 (*Merge two grammars*)
   508 (*Merge two grammars*)
   511 fun merge_grams gram_a gram_b =
   509 fun merge_grams gram_a gram_b =
   512   let
   510   let
   513     (*find out which grammar is bigger*)
   511     (*find out which grammar is bigger*)
   540                      (fst (valOf (find_first (fn (n, t) => t = tag) tag_list)));
   538                      (fst (valOf (find_first (fn (n, t) => t = tag) tag_list)));
   541               in Array.update (table, tag, tag');
   539               in Array.update (table, tag, tag');
   542                  store_tag nt_count' tags' (tag-1)
   540                  store_tag nt_count' tags' (tag-1)
   543               end;
   541               end;
   544       in (store_tag nt_count1 tags1 (nt_count2-1), table) end;
   542       in (store_tag nt_count1 tags1 (nt_count2-1), table) end;
   545     
   543 
   546     (*convert grammar2 tag to grammar1 tag*)
   544     (*convert grammar2 tag to grammar1 tag*)
   547     fun convert_tag tag = Array.sub (tag_table, tag);
   545     fun convert_tag tag = Array.sub (tag_table, tag);
   548 
   546 
   549     (*convert chain list to raw productions*)
   547     (*convert chain list to raw productions*)
   550     fun mk_chain_prods [] result = result
   548     fun mk_chain_prods [] result = result
   554 
   552 
   555           fun make [] result = result
   553           fun make [] result = result
   556             | make (from :: froms) result = make froms ((to_tag,
   554             | make (from :: froms) result = make froms ((to_tag,
   557                 ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result);
   555                 ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result);
   558         in mk_chain_prods cs (make froms [] @ result) end;
   556         in mk_chain_prods cs (make froms [] @ result) end;
   559         
   557 
   560     val chain_prods = mk_chain_prods chains2 [];
   558     val chain_prods = mk_chain_prods chains2 [];
   561 
   559 
   562     (*convert prods2 array to productions*)
   560     (*convert prods2 array to productions*)
   563     fun process_nt ~1 result = result
   561     fun process_nt ~1 result = result
   564       | process_nt nt result =
   562       | process_nt nt result =
   625 (*Make states using a list of rhss*)
   623 (*Make states using a list of rhss*)
   626 fun mkStates i minPrec lhsID rhss =
   624 fun mkStates i minPrec lhsID rhss =
   627   let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i);
   625   let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i);
   628   in map mkState rhss end;
   626   in map mkState rhss end;
   629 
   627 
   630 (*Add parse tree to list and eliminate duplicates 
   628 (*Add parse tree to list and eliminate duplicates
   631   saving the maximum precedence*)
   629   saving the maximum precedence*)
   632 fun conc (t, prec:int) [] = (NONE, [(t, prec)])
   630 fun conc (t, prec:int) [] = (NONE, [(t, prec)])
   633   | conc (t, prec) ((t', prec') :: ts) =
   631   | conc (t, prec) ((t', prec') :: ts) =
   634       if t = t' then
   632       if t = t' then
   635         (SOME prec', if prec' >= prec then (t', prec') :: ts 
   633         (SOME prec', if prec' >= prec then (t', prec') :: ts
   636                      else (t, prec) :: ts)
   634                      else (t, prec) :: ts)
   637       else
   635       else
   638         let val (n, ts') = conc (t, prec) ts
   636         let val (n, ts') = conc (t, prec) ts
   639         in (n, (t', prec') :: ts') end;
   637         in (n, (t', prec') :: ts') end;
   640 
   638 
   700 let (*similar to token_assoc but does not automatically include 'NONE' key*)
   698 let (*similar to token_assoc but does not automatically include 'NONE' key*)
   701     fun token_assoc2 (list, key) =
   699     fun token_assoc2 (list, key) =
   702       let fun assoc [] result = result
   700       let fun assoc [] result = result
   703             | assoc ((keyi, pi) :: pairs) result =
   701             | assoc ((keyi, pi) :: pairs) result =
   704                 if isSome keyi andalso matching_tokens (valOf keyi, key)
   702                 if isSome keyi andalso matching_tokens (valOf keyi, key)
   705                    orelse include_none andalso is_none keyi then 
   703                    orelse include_none andalso is_none keyi then
   706                   assoc pairs (pi @ result)
   704                   assoc pairs (pi @ result)
   707                 else assoc pairs result;
   705                 else assoc pairs result;
   708       in assoc list [] end;
   706       in assoc list [] end;
   709 
   707 
   710     fun get_prods [] result = result
   708     fun get_prods [] result = result
   730                                       (*wanted precedence has been processed*)
   728                                       (*wanted precedence has been processed*)
   731                     (used, movedot_lambda S l)
   729                     (used, movedot_lambda S l)
   732                   else            (*wanted precedence hasn't been parsed yet*)
   730                   else            (*wanted precedence hasn't been parsed yet*)
   733                     let
   731                     let
   734                       val tk_prods = all_prods_for nt;
   732                       val tk_prods = all_prods_for nt;
   735                       
   733 
   736                       val States' = mkStates i minPrec nt
   734                       val States' = mkStates i minPrec nt
   737                                       (getRHS' minPrec usedPrec tk_prods);
   735                                       (getRHS' minPrec usedPrec tk_prods);
   738                     in (update_prec (nt, minPrec) used, 
   736                     in (update_prec (nt, minPrec) used,
   739                         movedot_lambda S l @ States')
   737                         movedot_lambda S l @ States')
   740                     end
   738                     end
   741 
   739 
   742               | NONE =>           (*nonterminal is parsed for the first time*)
   740               | NONE =>           (*nonterminal is parsed for the first time*)
   743                   let val tk_prods = all_prods_for nt;
   741                   let val tk_prods = all_prods_for nt;
   774                     if n >= prec then processS used' States (S :: Si, Sii)
   772                     if n >= prec then processS used' States (S :: Si, Sii)
   775                     else
   773                     else
   776                       let val Slist = getS' A prec n Si;
   774                       let val Slist = getS' A prec n Si;
   777                           val States' = map (movedot_nonterm tt) Slist;
   775                           val States' = map (movedot_nonterm tt) Slist;
   778                       in processS used' (States' @ States) (S :: Si, Sii) end
   776                       in processS used' (States' @ States) (S :: Si, Sii) end
   779               end 
   777               end
   780             else
   778             else
   781               let val Slist = getStates Estate i j A prec
   779               let val Slist = getStates Estate i j A prec
   782               in processS used (map (movedot_nonterm tt) Slist @ States)
   780               in processS used (map (movedot_nonterm tt) Slist @ States)
   783                           (S :: Si, Sii)
   781                           (S :: Si, Sii)
   784               end
   782               end
   791     val msg =
   789     val msg =
   792       if null toks then Pretty.str "Inner syntax error: unexpected end of input"
   790       if null toks then Pretty.str "Inner syntax error: unexpected end of input"
   793       else
   791       else
   794         Pretty.block (Pretty.str "Inner syntax error at: \"" ::
   792         Pretty.block (Pretty.str "Inner syntax error at: \"" ::
   795           Pretty.breaks (map (Pretty.str o str_of_token)
   793           Pretty.breaks (map (Pretty.str o str_of_token)
   796 		 (rev (tl (rev toks)))) @
   794                  (rev (tl (rev toks)))) @
   797           [Pretty.str "\""]);
   795           [Pretty.str "\""]);
   798     val expected =
   796     val expected =
   799       Pretty.strs ("Expected tokens: " :: map (quote o str_of_token) allowed);
   797       Pretty.strs ("Expected tokens: " :: map (quote o str_of_token) allowed);
   800   in
   798   in
   801     error (Pretty.string_of (Pretty.blk (0, [msg, Pretty.fbrk, expected])))
   799     error (Pretty.string_of (Pretty.blk (0, [msg, Pretty.fbrk, expected])))
   808 
   806 
   809               (*test if tk is a lookahead for a given minimum precedence*)
   807               (*test if tk is a lookahead for a given minimum precedence*)
   810               fun reduction _ minPrec _ (Terminal _ :: _, _, prec:int) =
   808               fun reduction _ minPrec _ (Terminal _ :: _, _, prec:int) =
   811                     if prec >= minPrec then true
   809                     if prec >= minPrec then true
   812                     else false
   810                     else false
   813                 | reduction tk minPrec checked 
   811                 | reduction tk minPrec checked
   814                             (Nonterminal (nt, nt_prec) :: _, _, prec) =
   812                             (Nonterminal (nt, nt_prec) :: _, _, prec) =
   815                   if prec >= minPrec andalso not (nt mem checked) then
   813                   if prec >= minPrec andalso not (nt mem checked) then
   816                     let val chained = connected_with chains [nt] [nt];
   814                     let val chained = connected_with chains [nt] [nt];
   817                     in exists
   815                     in exists
   818                          (reduction tk nt_prec (chained @ checked))
   816                          (reduction tk nt_prec (chained @ checked))
   819                          (some_prods_for tk nt)
   817                          (some_prods_for tk nt)
   820                     end
   818                     end
   821                   else false;
   819                   else false;
   822 
   820 
   823               (*compute a list of allowed starting tokens 
   821               (*compute a list of allowed starting tokens
   824                 for a list of nonterminals considering precedence*)
   822                 for a list of nonterminals considering precedence*)
   825               fun get_starts [] result = result
   823               fun get_starts [] result = result
   826                 | get_starts ((nt, minPrec:int) :: nts) result =
   824                 | get_starts ((nt, minPrec:int) :: nts) result =
   827                   let fun get [] result = result
   825                   let fun get [] result = result
   828                         | get ((SOME tk, prods) :: ps) result =
   826                         | get ((SOME tk, prods) :: ps) result =
   839                   in get_starts (chained @ nts)
   837                   in get_starts (chained @ nts)
   840                                 ((get nt_prods []) union result)
   838                                 ((get nt_prods []) union result)
   841                   end;
   839                   end;
   842 
   840 
   843               val nts =
   841               val nts =
   844                 List.mapPartial (fn (_, _, _, Nonterminal (a, prec) :: _, _, _) => 
   842                 List.mapPartial (fn (_, _, _, Nonterminal (a, prec) :: _, _, _) =>
   845                            SOME (a, prec) | _ => NONE)
   843                            SOME (a, prec) | _ => NONE)
   846                           (Array.sub (stateset, i-1));
   844                           (Array.sub (stateset, i-1));
   847               val allowed =
   845               val allowed =
   848                 distinct (get_starts nts [] @
   846                 distinct (get_starts nts [] @
   849                   (List.mapPartial (fn (_, _, _, Terminal a :: _, _, _) => SOME a
   847                   (List.mapPartial (fn (_, _, _, Terminal a :: _, _, _) => SOME a
   861           Array.update (stateset, i + 1, sii);
   859           Array.update (stateset, i + 1, sii);
   862           produce prods chains stateset (i + 1) cs c
   860           produce prods chains stateset (i + 1) cs c
   863        end));
   861        end));
   864 
   862 
   865 
   863 
   866 fun get_trees l = List.mapPartial (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) 
   864 fun get_trees l = List.mapPartial (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE)
   867                             l;
   865                             l;
   868 
   866 
   869 fun earley prods tags chains startsymbol indata =
   867 fun earley prods tags chains startsymbol indata =
   870   let
   868   let
   871     val start_tag = case Symtab.lookup (tags, startsymbol) of
   869     val start_tag = case Symtab.lookup (tags, startsymbol) of
   872                        SOME tag => tag
   870                        SOME tag => tag
   873                      | NONE   => error ("parse: Unknown startsymbol " ^ 
   871                      | NONE   => error ("parse: Unknown startsymbol " ^
   874                                         quote startsymbol);
   872                                         quote startsymbol);
   875     val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken],
   873     val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken],
   876                "", 0)];
   874                "", 0)];
   877     val s = length indata + 1;
   875     val s = length indata + 1;
   878     val Estate = Array.array (s, []);
   876     val Estate = Array.array (s, []);