src/Pure/General/scan.ML
author wenzelm
Thu Jul 19 23:18:48 2007 +0200 (2007-07-19)
changeset 23863 8f3099589cfa
parent 23699 5a4527f3ac79
child 24025 77e3e5781a99
permissions -rw-r--r--
tuned signature;
berghofe@11523
     1
(*  Title:      Pure/General/scan.ML
berghofe@11523
     2
    ID:         $Id$
berghofe@11523
     3
    Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
wenzelm@6116
     4
wenzelm@6116
     5
Generic scanners (for potentially infinite input).
wenzelm@6116
     6
*)
wenzelm@6116
     7
wenzelm@6116
     8
infix 5 -- :-- |-- --| ^^;
wenzelm@6116
     9
infix 3 >>;
wenzelm@23699
    10
infixr 0 ||;
wenzelm@6116
    11
wenzelm@6116
    12
signature BASIC_SCAN =
wenzelm@6116
    13
sig
wenzelm@14677
    14
  (*error msg handler*)
wenzelm@6116
    15
  val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    16
  (*apply function*)
wenzelm@6116
    17
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
wenzelm@14677
    18
  (*alternative*)
wenzelm@6116
    19
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    20
  (*sequential pairing*)
wenzelm@6116
    21
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@14677
    22
  (*dependent pairing*)
wenzelm@6116
    23
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@14677
    24
  (*forget fst*)
wenzelm@6116
    25
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
wenzelm@14677
    26
  (*forget snd*)
wenzelm@6116
    27
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
wenzelm@14677
    28
  (*concatenation*)
wenzelm@6116
    29
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
wenzelm@14677
    30
  (*one element literal*)
wenzelm@19291
    31
  val $$ : string -> string list -> string * string list
wenzelm@19306
    32
  val ~$$ : string -> string list -> string * string list
wenzelm@6116
    33
end;
wenzelm@6116
    34
wenzelm@6116
    35
signature SCAN =
wenzelm@6116
    36
sig
wenzelm@6116
    37
  include BASIC_SCAN
wenzelm@23699
    38
  val prompt: string -> ('a -> 'b) -> 'a -> 'b
wenzelm@23699
    39
  val error: ('a -> 'b) -> 'a -> 'b
wenzelm@23699
    40
  val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
wenzelm@6116
    41
  val fail: 'a -> 'b
wenzelm@6116
    42
  val fail_with: ('a -> string) -> 'a -> 'b
wenzelm@6116
    43
  val succeed: 'a -> 'b -> 'a * 'b
wenzelm@15664
    44
  val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
wenzelm@15664
    45
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
wenzelm@19291
    46
  val this: string list -> string list -> string list * string list
wenzelm@14927
    47
  val this_string: string -> string list -> string * string list
wenzelm@21858
    48
  val many: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@21858
    49
  val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    50
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
wenzelm@6116
    51
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
wenzelm@6116
    52
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    53
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@23699
    54
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@23699
    55
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    56
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
wenzelm@6116
    57
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
wenzelm@6116
    58
  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
wenzelm@6116
    59
  val first: ('a -> 'b) list -> 'a -> 'b
wenzelm@14677
    60
  val state: 'a * 'b -> 'a * ('a * 'b)
wenzelm@6116
    61
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
wenzelm@15664
    62
  val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
wenzelm@15664
    63
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
wenzelm@6116
    64
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
wenzelm@23699
    65
  val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
wenzelm@15664
    66
  val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
wenzelm@6116
    67
  val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
wenzelm@6116
    68
    -> 'b * 'a list -> 'c * ('d * 'a list)
wenzelm@6116
    69
  val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
wenzelm@6116
    70
  val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
wenzelm@23699
    71
  val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b * ('b -> bool) ->
wenzelm@23699
    72
    ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
wenzelm@6116
    73
  type lexicon
wenzelm@7025
    74
  val dest_lexicon: lexicon -> string list
wenzelm@6116
    75
  val make_lexicon: string list list -> lexicon
wenzelm@6116
    76
  val empty_lexicon: lexicon
wenzelm@22112
    77
  val extend_lexicon: string list list -> lexicon -> lexicon
wenzelm@6116
    78
  val merge_lexicons: lexicon -> lexicon -> lexicon
wenzelm@14686
    79
  val is_literal: lexicon -> string list -> bool
wenzelm@6116
    80
  val literal: lexicon -> string list -> string list * string list
wenzelm@6116
    81
end;
wenzelm@6116
    82
wenzelm@6116
    83
structure Scan: SCAN =
wenzelm@6116
    84
struct
wenzelm@6116
    85
wenzelm@6116
    86
wenzelm@6116
    87
(** scanners **)
wenzelm@6116
    88
wenzelm@23699
    89
(* exceptions *)
wenzelm@23699
    90
berghofe@11523
    91
exception MORE of string option;        (*need more input (prompt)*)
berghofe@11523
    92
exception FAIL of string option;        (*try alternatives (reason of failure)*)
berghofe@11523
    93
exception ABORT of string;              (*dead end*)
wenzelm@6116
    94
wenzelm@23699
    95
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
wenzelm@23699
    96
fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
wenzelm@23699
    97
fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
wenzelm@23699
    98
fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
wenzelm@23699
    99
fun error scan xs = scan xs handle ABORT msg => Library.error msg;
wenzelm@23699
   100
wenzelm@23699
   101
fun catch scan xs = scan xs
wenzelm@23699
   102
  handle ABORT msg => raise Fail msg
wenzelm@23699
   103
    | FAIL msg => raise Fail (the_default "Syntax error." msg);
wenzelm@23699
   104
wenzelm@6116
   105
wenzelm@6116
   106
(* scanner combinators *)
wenzelm@6116
   107
wenzelm@19306
   108
fun (scan >> f) xs = scan xs |>> f;
kleing@14078
   109
wenzelm@19306
   110
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
kleing@14078
   111
wenzelm@19306
   112
fun (scan1 :-- scan2) xs =
berghofe@14108
   113
  let
wenzelm@19306
   114
    val (x, ys) = scan1 xs;
wenzelm@19306
   115
    val (y, zs) = scan2 x ys;
wenzelm@19306
   116
  in ((x, y), zs) end;
kleing@14078
   117
wenzelm@19306
   118
fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
wenzelm@19306
   119
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
wenzelm@19306
   120
fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
wenzelm@19306
   121
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
wenzelm@6116
   122
wenzelm@6116
   123
wenzelm@6116
   124
(* generic scanners *)
wenzelm@6116
   125
skalberg@15531
   126
fun fail _ = raise FAIL NONE;
skalberg@15531
   127
fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
wenzelm@6116
   128
fun succeed y xs = (y, xs);
wenzelm@6116
   129
wenzelm@15664
   130
fun some _ [] = raise MORE NONE
wenzelm@15664
   131
  | some f (x :: xs) =
wenzelm@15664
   132
      (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
wenzelm@15664
   133
skalberg@15531
   134
fun one _ [] = raise MORE NONE
wenzelm@6116
   135
  | one pred (x :: xs) =
skalberg@15531
   136
      if pred x then (x, xs) else raise FAIL NONE;
wenzelm@6116
   137
wenzelm@19306
   138
fun $$ a = one (fn s: string => s = a);
wenzelm@19306
   139
fun ~$$ a = one (fn s: string => s <> a);
wenzelm@6116
   140
wenzelm@14833
   141
fun this ys xs =
wenzelm@14726
   142
  let
wenzelm@14726
   143
    fun drop_prefix [] xs = xs
skalberg@15531
   144
      | drop_prefix (_ :: _) [] = raise MORE NONE
wenzelm@14726
   145
      | drop_prefix (y :: ys) (x :: xs) =
wenzelm@19291
   146
          if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
wenzelm@14726
   147
  in (ys, drop_prefix ys xs) end;
wenzelm@14726
   148
wenzelm@15664
   149
fun this_string s = this (explode s) >> K s;  (*primitive string -- no symbols here!*)
wenzelm@14907
   150
wenzelm@21858
   151
fun many _ [] = raise MORE NONE
wenzelm@21858
   152
  | many pred (lst as x :: xs) =
wenzelm@21858
   153
      if pred x then apfst (cons x) (many pred xs)
wenzelm@6116
   154
      else ([], lst);
wenzelm@6116
   155
wenzelm@21858
   156
fun many1 pred = one pred -- many pred >> op ::;
wenzelm@6116
   157
wenzelm@15664
   158
fun optional scan def = scan || succeed def;
wenzelm@15664
   159
fun option scan = (scan >> SOME) || succeed NONE;
wenzelm@6116
   160
berghofe@13795
   161
fun repeat scan =
wenzelm@15664
   162
  let
wenzelm@15664
   163
    fun rep ys xs =
wenzelm@15664
   164
      (case (SOME (scan xs) handle FAIL _ => NONE) of
wenzelm@15664
   165
        NONE => (rev ys, xs)
wenzelm@15664
   166
      | SOME (y, xs') => rep (y :: ys) xs');
berghofe@14108
   167
  in rep [] end;
berghofe@13795
   168
wenzelm@15664
   169
fun repeat1 scan = scan -- repeat scan >> op ::;
wenzelm@6116
   170
wenzelm@23699
   171
fun single scan = scan >> (fn x => [x]);
wenzelm@23699
   172
fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
wenzelm@23699
   173
wenzelm@6116
   174
fun max leq scan1 scan2 xs =
wenzelm@6116
   175
  (case (option scan1 xs, option scan2 xs) of
skalberg@15531
   176
    ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
skalberg@15531
   177
  | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
skalberg@15531
   178
  | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
skalberg@15531
   179
  | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
wenzelm@6116
   180
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
wenzelm@6116
   181
wenzelm@6116
   182
fun ahead scan xs = (fst (scan xs), xs);
wenzelm@6116
   183
wenzelm@6116
   184
fun unless test scan =
skalberg@15531
   185
  ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
wenzelm@6116
   186
wenzelm@6116
   187
fun first [] = fail
wenzelm@6116
   188
  | first (scan :: scans) = scan || first scans;
wenzelm@6116
   189
wenzelm@6116
   190
wenzelm@6116
   191
(* state based scanners *)
wenzelm@6116
   192
wenzelm@9122
   193
fun state (st, xs) = (st, (st, xs));
wenzelm@9122
   194
wenzelm@6116
   195
fun depend scan (st, xs) =
wenzelm@6116
   196
  let val ((st', y), xs') = scan st xs
wenzelm@6116
   197
  in (y, (st', xs')) end;
wenzelm@6116
   198
wenzelm@15664
   199
fun peek scan = depend (fn st => scan st >> pair st);
wenzelm@15664
   200
wenzelm@15664
   201
fun pass st scan xs =
wenzelm@15664
   202
  let val (y, (_, xs')) = scan (st, xs)
wenzelm@15664
   203
  in (y, xs') end;
wenzelm@15664
   204
wenzelm@6116
   205
fun lift scan (st, xs) =
wenzelm@6116
   206
  let val (y, xs') = scan xs
wenzelm@6116
   207
  in (y, (st, xs')) end;
wenzelm@6116
   208
wenzelm@15664
   209
fun unlift scan = pass () scan;
wenzelm@15664
   210
wenzelm@15664
   211
wenzelm@15664
   212
(* trace input *)
wenzelm@15664
   213
wenzelm@23699
   214
fun trace scan xs =
wenzelm@23699
   215
  let val (y, xs') = scan xs
wenzelm@23699
   216
  in ((y, Library.take (length xs - length xs', xs)), xs') end;
wenzelm@6116
   217
wenzelm@6116
   218
wenzelm@6116
   219
(* finite scans *)
wenzelm@6116
   220
wenzelm@6116
   221
fun finite' (stopper, is_stopper) scan (state, input) =
wenzelm@6116
   222
  let
wenzelm@6116
   223
    fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
wenzelm@6116
   224
wenzelm@6116
   225
    fun stop [] = lost ()
wenzelm@6116
   226
      | stop lst =
wenzelm@6116
   227
          let val (xs, x) = split_last lst
wenzelm@6116
   228
          in if is_stopper x then ((), xs) else lost () end;
wenzelm@6116
   229
  in
wenzelm@6116
   230
    if exists is_stopper input then
wenzelm@6116
   231
      raise ABORT "Stopper may not occur in input of finite scan!"
wenzelm@23674
   232
    else (strict scan --| lift stop) (state, input @ [stopper])
wenzelm@6116
   233
  end;
wenzelm@6116
   234
wenzelm@15664
   235
fun finite stopper scan = unlift (finite' stopper (lift scan));
wenzelm@6116
   236
wenzelm@6116
   237
fun read stopper scan xs =
wenzelm@6116
   238
  (case error (finite stopper (option scan)) xs of
skalberg@15531
   239
    (y as SOME _, []) => y
skalberg@15531
   240
  | _ => NONE);
wenzelm@6116
   241
wenzelm@6116
   242
wenzelm@6116
   243
(* infinite scans -- draining state-based source *)
wenzelm@6116
   244
wenzelm@23699
   245
fun drain def_prompt get stopper scan ((state, xs), src) =
wenzelm@23699
   246
  (scan (state, xs), src) handle MORE prompt =>
wenzelm@23699
   247
    (case get (the_default def_prompt prompt) src of
wenzelm@6116
   248
      ([], _) => (finite' stopper scan (state, xs), src)
wenzelm@23699
   249
    | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
wenzelm@6116
   250
wenzelm@6116
   251
wenzelm@6116
   252
wenzelm@6116
   253
(** datatype lexicon **)
wenzelm@6116
   254
wenzelm@6116
   255
datatype lexicon =
wenzelm@6116
   256
  Empty |
wenzelm@6116
   257
  Branch of string * string list * lexicon * lexicon * lexicon;
wenzelm@6116
   258
wenzelm@6116
   259
val no_literal = [];
wenzelm@6116
   260
wenzelm@6116
   261
wenzelm@6116
   262
(* dest_lexicon *)
wenzelm@6116
   263
wenzelm@7025
   264
fun dest_lex Empty = []
wenzelm@7025
   265
  | dest_lex (Branch (_, [], lt, eq, gt)) =
wenzelm@7025
   266
      dest_lex lt @ dest_lex eq @ dest_lex gt
wenzelm@7025
   267
  | dest_lex (Branch (_, cs, lt, eq, gt)) =
wenzelm@7025
   268
      dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
wenzelm@7025
   269
wenzelm@7025
   270
val dest_lexicon = map implode o dest_lex;
wenzelm@6116
   271
wenzelm@6116
   272
wenzelm@6116
   273
(* empty, extend, make, merge lexicons *)
wenzelm@6116
   274
wenzelm@6116
   275
val empty_lexicon = Empty;
wenzelm@6116
   276
wenzelm@22112
   277
fun extend_lexicon [] lexicon = lexicon
wenzelm@22112
   278
  | extend_lexicon chrss lexicon =
wenzelm@6116
   279
      let
wenzelm@19306
   280
        fun ext chrs lex =
berghofe@11523
   281
          let
wenzelm@19306
   282
            fun add (chs as c :: cs) (Branch (d, a, lt, eq, gt)) =
wenzelm@19306
   283
                  (case fast_string_ord (c, d) of
wenzelm@19306
   284
                    LESS => Branch (d, a, add chs lt, eq, gt)
wenzelm@19306
   285
                  | EQUAL => Branch (d, if null cs then chrs else a, lt, add cs eq, gt)
wenzelm@19306
   286
                  | GREATER => Branch (d, a, lt, eq, add chs gt))
wenzelm@19306
   287
              | add [c] Empty = Branch (c, chrs, Empty, Empty, Empty)
wenzelm@19306
   288
              | add (c :: cs) Empty = Branch (c, no_literal, Empty, add cs Empty, Empty)
wenzelm@19306
   289
              | add [] lex = lex;
wenzelm@19306
   290
          in add chrs lex end;
wenzelm@19306
   291
      in lexicon |> fold ext (chrss |> subtract (op =) (dest_lex lexicon)) end;
wenzelm@6116
   292
wenzelm@22112
   293
fun make_lexicon chrss = extend_lexicon chrss empty_lexicon;
wenzelm@6116
   294
wenzelm@6116
   295
fun merge_lexicons lex1 lex2 =
wenzelm@6116
   296
  let
wenzelm@7025
   297
    val chss1 = dest_lex lex1;
wenzelm@7025
   298
    val chss2 = dest_lex lex2;
wenzelm@6116
   299
  in
wenzelm@6116
   300
    if chss2 subset chss1 then lex1
wenzelm@6116
   301
    else if chss1 subset chss2 then lex2
wenzelm@22112
   302
    else extend_lexicon chss2 lex1
wenzelm@6116
   303
  end;
wenzelm@6116
   304
wenzelm@6116
   305
wenzelm@14686
   306
(* is_literal *)
wenzelm@14686
   307
wenzelm@14686
   308
fun is_literal Empty _ = false
wenzelm@14686
   309
  | is_literal _ [] = false
wenzelm@14686
   310
  | is_literal (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
wenzelm@19306
   311
      (case fast_string_ord (c, d) of
wenzelm@14686
   312
        LESS => is_literal lt chs
wenzelm@14686
   313
      | EQUAL => a <> no_literal andalso null cs orelse is_literal eq cs
wenzelm@14686
   314
      | GREATER => is_literal gt chs);
wenzelm@14686
   315
wenzelm@14686
   316
wenzelm@6116
   317
(* scan literal *)
wenzelm@6116
   318
wenzelm@6116
   319
fun literal lex chrs =
wenzelm@6116
   320
  let
wenzelm@6116
   321
    fun lit Empty res _ = res
skalberg@15531
   322
      | lit (Branch _) _ [] = raise MORE NONE
wenzelm@6116
   323
      | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
wenzelm@19306
   324
          (case fast_string_ord (c, d) of
wenzelm@14686
   325
            LESS => lit lt res chs
skalberg@15531
   326
          | EQUAL => lit eq (if a = no_literal then res else SOME (a, cs)) cs
wenzelm@14686
   327
          | GREATER => lit gt res chs);
wenzelm@6116
   328
  in
skalberg@15531
   329
    (case lit lex NONE chrs of
skalberg@15531
   330
      NONE => raise FAIL NONE
skalberg@15531
   331
    | SOME res => res)
wenzelm@6116
   332
  end;
wenzelm@6116
   333
wenzelm@6116
   334
end;
wenzelm@6116
   335
wenzelm@6116
   336
structure BasicScan: BASIC_SCAN = Scan;
wenzelm@6116
   337
open BasicScan;