src/Pure/Syntax/scan.ML
author wenzelm
Wed, 20 May 1998 18:57:16 +0200
changeset 4953 78ff4a45a822
parent 4937 e3132cf1d68e
child 4958 ad2acb8d2db4
permissions -rw-r--r--
source vs. source';
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     1
(*  Title:	Pure/Syntax/scan.ML
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     2
    ID:		$Id$
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     3
    Author:	Markus Wenzel and Tobias Nipkow, TU Muenchen
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     4
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     5
Generic scanners (for potentially infinite input).
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     6
*)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     7
4924
cf6bb75968c4 added :-- (dependent pair);
wenzelm
parents: 4919
diff changeset
     8
infix 5 -- :-- |-- --| ^^;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
     9
infix 3 >>;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    10
infix 0 ||;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    11
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    12
signature BASIC_SCAN =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    13
sig
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
    14
  val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    15
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    16
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    17
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
4924
cf6bb75968c4 added :-- (dependent pair);
wenzelm
parents: 4919
diff changeset
    18
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    19
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    20
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    21
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    22
  val $$ : ''a -> ''a list -> ''a * ''a list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    23
end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    24
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    25
signature SCAN =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    26
sig
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    27
  include BASIC_SCAN
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    28
  val fail: 'a -> 'b
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
    29
  val fail_with: ('a -> string) -> 'a -> 'b
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    30
  val succeed: 'a -> 'b -> 'a * 'b
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    31
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    32
  val any: ('a -> bool) -> 'a list -> 'a list * 'a list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    33
  val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    34
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    35
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    36
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    37
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    38
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    39
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    40
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    41
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    42
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
    43
  val try: ('a -> 'b) -> 'a -> 'b
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    44
  val force: ('a -> 'b) -> 'a -> 'b
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
    45
  val prompt: string -> ('a -> 'b) -> 'a -> 'b
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    46
  val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    47
    -> 'b * 'a list -> 'c * ('d * 'a list)
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    48
  val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    49
  val error: ('a -> 'b) -> 'a -> 'b
4953
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
    50
  val source': string -> (string -> 'a -> 'b list * 'a) ->
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    51
    ('b list * 'a -> 'c) -> 'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    52
      'd * 'a -> 'e list * ('d * 'c)
4953
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
    53
  val source: string -> (string -> 'a -> 'b list * 'a) ->
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
    54
    ('b list * 'a -> 'c) -> 'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
    55
      'a -> 'd list * 'c
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    56
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    57
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    58
  type lexicon
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    59
  val dest_lexicon: lexicon -> string list list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    60
  val make_lexicon: string list list -> lexicon
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    61
  val empty_lexicon: lexicon
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    62
  val extend_lexicon: lexicon -> string list list -> lexicon
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    63
  val merge_lexicons: lexicon -> lexicon -> lexicon
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    64
  val literal: lexicon -> string list -> string list * string list
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    65
end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    66
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    67
structure Scan: SCAN =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    68
struct
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    69
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    70
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    71
(** scanners **)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    72
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
    73
exception MORE of string option;	(*need more input (use prompt)*)
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
    74
exception FAIL of string option;	(*try alternatives (reason of failure)*)
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
    75
exception ABORT of string;		(*dead end*)
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    76
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    77
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    78
(* scanner combinators *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    79
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    80
fun (scan >> f) xs = apfst f (scan xs);
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    81
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
    82
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    83
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
    84
(*dependent pairing*)
4924
cf6bb75968c4 added :-- (dependent pair);
wenzelm
parents: 4919
diff changeset
    85
fun (scan1 :-- scan2) xs =
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    86
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    87
    val (x, ys) = scan1 xs;
4924
cf6bb75968c4 added :-- (dependent pair);
wenzelm
parents: 4919
diff changeset
    88
    val (y, zs) = scan2 x ys;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    89
  in ((x, y), zs) end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    90
4924
cf6bb75968c4 added :-- (dependent pair);
wenzelm
parents: 4919
diff changeset
    91
fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    92
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    93
fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    94
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    95
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    96
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    97
(* generic scanners *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
    98
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
    99
fun fail _ = raise FAIL None;
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   100
fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   101
fun succeed y xs = (y, xs);
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   102
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   103
fun one _ [] = raise MORE None
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   104
  | one pred (x :: xs) =
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   105
      if pred x then (x, xs) else raise FAIL None;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   106
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   107
fun $$ _ [] = raise MORE None
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   108
  | $$ a (x :: xs) =
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   109
      if a = x then (x, xs) else raise FAIL None;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   110
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   111
fun any _ [] = raise MORE None
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   112
  | any pred (lst as x :: xs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   113
      if pred x then apfst (cons x) (any pred xs)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   114
      else ([], lst);
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   115
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   116
fun any1 pred = one pred -- any pred >> op ::;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   117
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   118
fun optional scan def = scan || succeed def;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   119
fun option scan = optional (scan >> Some) None;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   120
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   121
fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   122
fun repeat1 scan = scan -- repeat scan >> op ::;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   123
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   124
fun max leq scan1 scan2 xs =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   125
  (case (option scan1 xs, option scan2 xs) of
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   126
    ((None, _), (None, _)) => raise FAIL None		(*looses FAIL msg!*)
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   127
  | ((Some tok1, xs'), (None, _)) => (tok1, xs')
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   128
  | ((None, _), (Some tok2, xs')) => (tok2, xs')
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   129
  | ((Some tok1, xs1'), (Some tok2, xs2')) =>
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   130
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   131
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   132
fun ahead scan xs = (fst (scan xs), xs);
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   133
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   134
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   135
(* state based scanners *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   136
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   137
fun depend scan (st, xs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   138
  let val ((st', y), xs') = scan st xs
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   139
  in (y, (st', xs')) end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   140
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   141
fun lift scan (st, xs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   142
  let val (y, xs') = scan xs
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   143
  in (y, (st, xs')) end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   144
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   145
fun pass st scan xs =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   146
  let val (y, (_, xs')) = scan (st, xs)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   147
  in (y, xs') end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   148
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   149
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   150
(* exception handling *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   151
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   152
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   153
fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   154
fun force scan xs = scan xs handle MORE _ => raise FAIL None;
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   155
fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   156
fun error scan xs = scan xs handle ABORT msg => Library.error msg;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   157
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   158
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   159
(* finite scans *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   160
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   161
fun finite' (stopper, is_stopper) scan (state, input) =
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   162
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   163
    fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   164
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   165
    fun stop [] = lost ()
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   166
      | stop lst =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   167
          let val (xs, x) = split_last lst
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   168
          in if is_stopper x then ((), xs) else lost () end;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   169
  in
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   170
    if exists is_stopper input then
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   171
      raise ABORT "Stopper may not occur in input of finite scan!"
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   172
    else (force scan --| lift stop) (state, input @ [stopper])
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   173
  end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   174
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   175
fun finite stopper scan xs =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   176
  let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   177
  in (y, xs') end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   178
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   179
4903
0f56199a8d97 improved source: state-based;
wenzelm
parents: 4756
diff changeset
   180
(* infinite scans -- draining state-based source *)
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   181
4953
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
   182
fun source' def_prmpt get unget stopper scan (state, src) =
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   183
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   184
    fun drain (xs, s) =
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   185
      (scan (state, xs), s) handle MORE prmpt =>
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   186
        (case get (if_none prmpt def_prmpt) s of
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   187
          ([], _) => (finite' stopper scan (state, xs), s)
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   188
        | (xs', s') => drain (xs @ xs', s'));
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   189
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   190
    val ((ys, (state', rest)), src') =
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   191
      (case get def_prmpt src of
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   192
        ([], s) => (([], (state, [])), s)
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   193
      | xs_s => drain xs_s);
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   194
  in
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   195
    (ys, (state', unget (rest, src')))
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   196
  end;
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   197
4953
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
   198
fun source def_prmpt get unget stopper scan src =
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
   199
  let val (ys, ((), src')) = source' def_prmpt get unget stopper (lift scan) ((), src)
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
   200
  in (ys, src') end;
78ff4a45a822 source vs. source';
wenzelm
parents: 4937
diff changeset
   201
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   202
fun single scan = scan >> (fn x => [x]);
4937
e3132cf1d68e improved finite scans: more abstract stopper;
wenzelm
parents: 4924
diff changeset
   203
fun bulk scan = scan -- repeat (try scan) >> (op ::);
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   204
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   205
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   206
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   207
(** datatype lexicon **)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   208
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   209
datatype lexicon =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   210
  Empty |
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   211
  Branch of string * string list * lexicon * lexicon * lexicon;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   212
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   213
val no_literal = [];
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   214
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   215
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   216
(* dest_lexicon *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   217
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   218
fun dest_lexicon Empty = []
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   219
  | dest_lexicon (Branch (_, [], lt, eq, gt)) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   220
      dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   221
  | dest_lexicon (Branch (_, cs, lt, eq, gt)) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   222
      dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   223
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   224
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   225
(* empty, extend, make, merge lexicons *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   226
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   227
val empty_lexicon = Empty;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   228
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   229
fun extend_lexicon lexicon chrss =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   230
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   231
    fun ext (lex, chrs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   232
      let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   233
	fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   234
	      if c < d then Branch (d, a, add lt chs, eq, gt)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   235
	      else if c > d then Branch (d, a, lt, eq, add gt chs)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   236
	      else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   237
	  | add Empty [c] =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   238
	      Branch (c, chrs, Empty, Empty, Empty)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   239
	  | add Empty (c :: cs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   240
	      Branch (c, no_literal, Empty, add Empty cs, Empty)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   241
	  | add lex [] = lex;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   242
      in add lex chrs end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   243
  in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   244
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   245
val make_lexicon = extend_lexicon empty_lexicon;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   246
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   247
fun merge_lexicons lex1 lex2 =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   248
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   249
    val chss1 = dest_lexicon lex1;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   250
    val chss2 = dest_lexicon lex2;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   251
  in
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   252
    if chss2 subset chss1 then lex1
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   253
    else if chss1 subset chss2 then lex2
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   254
    else extend_lexicon lex1 chss2
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   255
  end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   256
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   257
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   258
(* scan literal *)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   259
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   260
fun literal lex chrs =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   261
  let
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   262
    fun lit Empty res _ = res
4756
329c09e15991 added try, single, many;
wenzelm
parents: 4702
diff changeset
   263
      | lit (Branch _) _ [] = raise MORE None
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   264
      | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   265
	  if c < d then lit lt res chs
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   266
	  else if c > d then lit gt res chs
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   267
	  else lit eq (if a = no_literal then res else Some (a, cs)) cs;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   268
  in
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   269
    (case lit lex None chrs of
4919
9397b1446cdb added fail_with and adapted !!;
wenzelm
parents: 4903
diff changeset
   270
      None => raise FAIL None
4702
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   271
    | Some res => res)
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   272
  end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   273
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   274
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   275
end;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   276
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   277
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   278
structure BasicScan: BASIC_SCAN = Scan;
ffbaf431665d Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff changeset
   279
open BasicScan;