src/Pure/General/scan.ML
changeset 23699 5a4527f3ac79
parent 23683 1fcfb8682209
child 24025 77e3e5781a99
equal deleted inserted replaced
23698:af84f2f13d4d 23699:5a4527f3ac79
     5 Generic scanners (for potentially infinite input).
     5 Generic scanners (for potentially infinite input).
     6 *)
     6 *)
     7 
     7 
     8 infix 5 -- :-- |-- --| ^^;
     8 infix 5 -- :-- |-- --| ^^;
     9 infix 3 >>;
     9 infix 3 >>;
    10 infix 0 ||;
    10 infixr 0 ||;
    11 
    11 
    12 signature BASIC_SCAN =
    12 signature BASIC_SCAN =
    13 sig
    13 sig
    14   (*error msg handler*)
    14   (*error msg handler*)
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    33 end;
    33 end;
    34 
    34 
    35 signature SCAN =
    35 signature SCAN =
    36 sig
    36 sig
    37   include BASIC_SCAN
    37   include BASIC_SCAN
       
    38   val prompt: string -> ('a -> 'b) -> 'a -> 'b
       
    39   val error: ('a -> 'b) -> 'a -> 'b
       
    40   val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
    38   val fail: 'a -> 'b
    41   val fail: 'a -> 'b
    39   val fail_with: ('a -> string) -> 'a -> 'b
    42   val fail_with: ('a -> string) -> 'a -> 'b
    40   val succeed: 'a -> 'b -> 'a * 'b
    43   val succeed: 'a -> 'b -> 'a * 'b
    41   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
    44   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
    42   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    45   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    46   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
    49   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
    47   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    50   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    48   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    51   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    49   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    52   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    50   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    53   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    54   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    55   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    51   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    56   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    52   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    57   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    53   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    58   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    54   val first: ('a -> 'b) list -> 'a -> 'b
    59   val first: ('a -> 'b) list -> 'a -> 'b
    55   val state: 'a * 'b -> 'a * ('a * 'b)
    60   val state: 'a * 'b -> 'a * ('a * 'b)
    56   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    61   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    57   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
    62   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
    58   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    63   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    59   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    64   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    60   val trace': ('a * 'b list -> 'c * ('d * 'e list)) -> 'a * 'b list ->
    65   val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
    61     ('c * 'b list) * ('d * 'e list)
       
    62   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    66   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    63   val prompt: string -> ('a -> 'b) -> 'a -> 'b
       
    64   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    67   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    65     -> 'b * 'a list -> 'c * ('d * 'a list)
    68     -> 'b * 'a list -> 'c * ('d * 'a list)
    66   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    69   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    67   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    70   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    68   val error: ('a -> 'b) -> 'a -> 'b
    71   val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b * ('b -> bool) ->
    69   val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    72     ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
    70     'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
       
    71     (bool * (string -> 'd * 'b list -> 'e list * ('d * 'b list))) option ->
       
    72     'd * 'a -> 'e list * ('d * 'c)
       
    73   val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
       
    74     'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
       
    75     (bool * (string -> 'b list -> 'd list * 'b list)) option -> 'a -> 'd list * 'c
       
    76   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    77   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    78   type lexicon
    73   type lexicon
    79   val dest_lexicon: lexicon -> string list
    74   val dest_lexicon: lexicon -> string list
    80   val make_lexicon: string list list -> lexicon
    75   val make_lexicon: string list list -> lexicon
    81   val empty_lexicon: lexicon
    76   val empty_lexicon: lexicon
    82   val extend_lexicon: string list list -> lexicon -> lexicon
    77   val extend_lexicon: string list list -> lexicon -> lexicon
    89 struct
    84 struct
    90 
    85 
    91 
    86 
    92 (** scanners **)
    87 (** scanners **)
    93 
    88 
       
    89 (* exceptions *)
       
    90 
    94 exception MORE of string option;        (*need more input (prompt)*)
    91 exception MORE of string option;        (*need more input (prompt)*)
    95 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    92 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    96 exception ABORT of string;              (*dead end*)
    93 exception ABORT of string;              (*dead end*)
       
    94 
       
    95 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
       
    96 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
       
    97 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
       
    98 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
       
    99 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
       
   100 
       
   101 fun catch scan xs = scan xs
       
   102   handle ABORT msg => raise Fail msg
       
   103     | FAIL msg => raise Fail (the_default "Syntax error." msg);
    97 
   104 
    98 
   105 
    99 (* scanner combinators *)
   106 (* scanner combinators *)
   100 
   107 
   101 fun (scan >> f) xs = scan xs |>> f;
   108 fun (scan >> f) xs = scan xs |>> f;
   158         NONE => (rev ys, xs)
   165         NONE => (rev ys, xs)
   159       | SOME (y, xs') => rep (y :: ys) xs');
   166       | SOME (y, xs') => rep (y :: ys) xs');
   160   in rep [] end;
   167   in rep [] end;
   161 
   168 
   162 fun repeat1 scan = scan -- repeat scan >> op ::;
   169 fun repeat1 scan = scan -- repeat scan >> op ::;
       
   170 
       
   171 fun single scan = scan >> (fn x => [x]);
       
   172 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
   163 
   173 
   164 fun max leq scan1 scan2 xs =
   174 fun max leq scan1 scan2 xs =
   165   (case (option scan1 xs, option scan2 xs) of
   175   (case (option scan1 xs, option scan2 xs) of
   166     ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
   176     ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
   167   | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
   177   | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
   199 fun unlift scan = pass () scan;
   209 fun unlift scan = pass () scan;
   200 
   210 
   201 
   211 
   202 (* trace input *)
   212 (* trace input *)
   203 
   213 
   204 fun trace' scan (st, xs) =
   214 fun trace scan xs =
   205   let val (y, (st', xs')) = scan (st, xs)
   215   let val (y, xs') = scan xs
   206   in ((y, Library.take (length xs - length xs', xs)), (st', xs')) end;
   216   in ((y, Library.take (length xs - length xs', xs)), xs') end;
   207 
       
   208 fun trace scan = unlift (trace' (lift scan));
       
   209 
       
   210 
       
   211 (* exception handling *)
       
   212 
       
   213 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
       
   214 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
       
   215 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
       
   216 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
       
   217 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (SOME msg);
       
   218 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
       
   219 
   217 
   220 
   218 
   221 (* finite scans *)
   219 (* finite scans *)
   222 
   220 
   223 fun finite' (stopper, is_stopper) scan (state, input) =
   221 fun finite' (stopper, is_stopper) scan (state, input) =
   242   | _ => NONE);
   240   | _ => NONE);
   243 
   241 
   244 
   242 
   245 (* infinite scans -- draining state-based source *)
   243 (* infinite scans -- draining state-based source *)
   246 
   244 
   247 fun drain def_prmpt get stopper scan ((state, xs), src) =
   245 fun drain def_prompt get stopper scan ((state, xs), src) =
   248   (scan (state, xs), src) handle MORE prmpt =>
   246   (scan (state, xs), src) handle MORE prompt =>
   249     (case get (the_default def_prmpt prmpt) src of
   247     (case get (the_default def_prompt prompt) src of
   250       ([], _) => (finite' stopper scan (state, xs), src)
   248       ([], _) => (finite' stopper scan (state, xs), src)
   251     | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
   249     | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
   252 
       
   253 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
       
   254   let
       
   255     val draining = drain def_prmpt get stopper;
       
   256     val (xs, s) = get def_prmpt src;
       
   257     val inp = ((state, xs), s);
       
   258     val ((ys, (state', xs')), src') =
       
   259       if null xs then (([], (state, [])), s)
       
   260       else
       
   261         (case opt_recover of
       
   262           NONE => draining (error scanner) inp
       
   263         | SOME (interactive, recover) =>
       
   264             (draining (catch scanner) inp handle FAIL msg =>
       
   265               let val err = the_default "Syntax error." msg in
       
   266                 if interactive then Output.error_msg err else ();
       
   267                 draining (unless (lift (one (#2 stopper))) (recover err)) inp
       
   268               end));
       
   269   in (ys, (state', unget (xs', src'))) end;
       
   270 
       
   271 fun source def_prmpt get unget stopper scan opt_recover =
       
   272   unlift (source' def_prmpt get unget stopper (lift scan)
       
   273     (Option.map (fn (int, r) => (int, lift o r)) opt_recover));
       
   274 
       
   275 fun single scan = scan >> (fn x => [x]);
       
   276 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
       
   277 
   250 
   278 
   251 
   279 
   252 
   280 (** datatype lexicon **)
   253 (** datatype lexicon **)
   281 
   254