src/Pure/General/scan.ML
author wenzelm
Tue, 10 Dec 2024 16:37:09 +0100
changeset 81569 f8b28356ab94
parent 78817 30bcf149054d
child 81588 81a72b7fcb0c
permissions -rw-r--r--
more LaTeX markup for printed entities;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11523
9a658fe20107 Tuned function extend_lexicon.
berghofe
parents: 10746
diff changeset
     1
(*  Title:      Pure/General/scan.ML
9a658fe20107 Tuned function extend_lexicon.
berghofe
parents: 10746
diff changeset
     2
    Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
     3
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
     4
Generic scanners (for potentially infinite input).
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
     5
*)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
     6
24025
77e3e5781a99 added :|-- (dependent projection);
wenzelm
parents: 23699
diff changeset
     7
infix 5 -- :-- :|-- |-- --| ^^;
25999
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
     8
infixr 5 ::: @@@;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
     9
infix 3 >>;
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    10
infixr 0 ||;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    11
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    12
signature BASIC_SCAN =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    13
sig
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
    14
  type message = unit -> string
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    15
  (*error msg handler*)
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
    16
  val !! : ('a * message option -> message) -> ('a -> 'b) -> 'a -> 'b
78817
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
    17
  val !!! : string -> ('a -> string option) -> ('a -> 'b) -> 'a -> 'b
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    18
  (*apply function*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    19
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    20
  (*alternative*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    21
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    22
  (*sequential pairing*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    23
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    24
  (*dependent pairing*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    25
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
24025
77e3e5781a99 added :|-- (dependent projection);
wenzelm
parents: 23699
diff changeset
    26
  (*projections*)
77e3e5781a99 added :|-- (dependent projection);
wenzelm
parents: 23699
diff changeset
    27
  val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    28
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    29
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    30
  (*concatenation*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    31
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
25999
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
    32
  val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
    33
  val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    34
  (*one element literal*)
19291
798192b86c41 made $$ and "this" monomorphic (string);
wenzelm
parents: 18683
diff changeset
    35
  val $$ : string -> string list -> string * string list
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
    36
  val ~$$ : string -> string list -> string * string list
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    37
end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    38
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    39
signature SCAN =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    40
sig
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    41
  include BASIC_SCAN
48743
a72f8ffecf31 refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents: 43947
diff changeset
    42
  val permissive: ('a -> 'b) -> 'a -> 'b
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    43
  val error: ('a -> 'b) -> 'a -> 'b
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    44
  val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
58864
505a8150368a recover via scanner;
wenzelm
parents: 58850
diff changeset
    45
  val recover: ('a -> 'b) -> (string -> 'a -> 'b) -> 'a -> 'b
61466
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
    46
  val triple1: ('a * 'b) * 'c -> 'a * 'b * 'c
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
    47
  val triple2: 'a * ('b * 'c) -> 'a * 'b * 'c
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    48
  val fail: 'a -> 'b
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
    49
  val fail_with: ('a -> message) -> 'a -> 'b
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    50
  val succeed: 'a -> 'b -> 'a * 'b
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
    51
  val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
    52
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
19291
798192b86c41 made $$ and "this" monomorphic (string);
wenzelm
parents: 18683
diff changeset
    53
  val this: string list -> string list -> string list * string list
14927
66d797e1b950 added trace (inefficient for very long input);
wenzelm
parents: 14907
diff changeset
    54
  val this_string: string -> string list -> string * string list
21858
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 19473
diff changeset
    55
  val many: ('a -> bool) -> 'a list -> 'a list * 'a list
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 19473
diff changeset
    56
  val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    57
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    58
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    59
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    60
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
61476
1884c40f1539 tuned signature;
wenzelm
parents: 61466
diff changeset
    61
  val repeats: ('a -> 'b list * 'a) -> 'a -> 'b list * 'a
1884c40f1539 tuned signature;
wenzelm
parents: 61466
diff changeset
    62
  val repeats1: ('a -> 'b list * 'a) -> 'a -> 'b list * 'a
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    63
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    64
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    65
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    66
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    67
  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    68
  val first: ('a -> 'b) list -> 'a -> 'b
14677
33a37f091dc5 tuned presentation;
wenzelm
parents: 14108
diff changeset
    69
  val state: 'a * 'b -> 'a * ('a * 'b)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    70
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
    71
  val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
55104
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
    72
  val provide: ('a -> bool) -> 'b -> ('b * 'c -> 'd * ('a * 'e)) -> 'c -> 'd * 'e
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
    73
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    74
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
    75
  val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
    76
  val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
27731
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    77
  type 'a stopper
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    78
  val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    79
  val is_stopper: 'a stopper -> 'a -> bool
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    80
  val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    81
    -> 'b * 'a list -> 'c * ('d * 'a list)
27731
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    82
  val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
    83
  val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
    84
  val drain: ('a -> 'b list * 'a) -> 'b stopper -> ('c * 'b list -> 'd * ('e * 'b list)) ->
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
    85
    ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    86
  type lexicon
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    87
  val is_literal: lexicon -> string list -> bool
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    88
  val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    89
  val empty_lexicon: lexicon
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    90
  val extend_lexicon: string list -> lexicon -> lexicon
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    91
  val make_lexicon: string list list -> lexicon
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    92
  val dest_lexicon: lexicon -> string list
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
    93
  val merge_lexicons: lexicon * lexicon -> lexicon
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    94
end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    95
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    96
structure Scan: SCAN =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    97
struct
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    98
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
    99
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   100
(** scanners **)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   101
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   102
(* exceptions *)
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   103
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   104
type message = unit -> string;
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   105
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   106
exception MORE of unit;  (*need more input*)
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   107
exception FAIL of message option;  (*try alternatives (reason of failure)*)
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   108
exception ABORT of message;  (*dead end*)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   109
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   110
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
78817
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   111
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   112
fun !!! prefix input_position scan =
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   113
  let
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   114
    fun print_position inp = the_default " (end-of-input)" (input_position inp);
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   115
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   116
    fun err (inp, NONE) = (fn () => prefix ^ print_position inp)
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   117
      | err (inp, SOME msg) =
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   118
          (fn () =>
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   119
            let val s = msg () in
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   120
              if String.isPrefix prefix s then s
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   121
              else prefix ^ print_position inp ^ ": " ^ s
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   122
            end);
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   123
  in !! err scan end;
30bcf149054d clarified modules;
wenzelm
parents: 62491
diff changeset
   124
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   125
fun permissive scan xs = scan xs handle MORE () => raise FAIL NONE | ABORT _ => raise FAIL NONE;
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   126
fun strict scan xs = scan xs handle MORE () => raise FAIL NONE;
62491
7187cb7a77c5 clarified modules;
wenzelm
parents: 61476
diff changeset
   127
fun error scan xs = scan xs handle ABORT msg => Exn.error (msg ());
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   128
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   129
fun catch scan xs = scan xs
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   130
  handle ABORT msg => raise Fail (msg ())
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   131
    | FAIL msg => raise Fail (case msg of NONE => "Syntax error" | SOME m => m ());
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   132
58864
505a8150368a recover via scanner;
wenzelm
parents: 58850
diff changeset
   133
fun recover scan1 scan2 xs =
505a8150368a recover via scanner;
wenzelm
parents: 58850
diff changeset
   134
  catch scan1 xs handle Fail msg => scan2 msg xs;
505a8150368a recover via scanner;
wenzelm
parents: 58850
diff changeset
   135
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   136
61466
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   137
(* utils *)
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   138
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   139
fun triple1 ((x, y), z) = (x, y, z);
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   140
fun triple2 (x, (y, z)) = (x, y, z);
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   141
9a468c3a1fa1 tuned signature;
wenzelm
parents: 59196
diff changeset
   142
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   143
(* scanner combinators *)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   144
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   145
fun (scan >> f) xs = scan xs |>> f;
14078
cddad2aa025b integrated optimizations by Sebastian Skalberg,
kleing
parents: 13795
diff changeset
   146
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   147
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
14078
cddad2aa025b integrated optimizations by Sebastian Skalberg,
kleing
parents: 13795
diff changeset
   148
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   149
fun (scan1 :-- scan2) xs =
14108
eaf3c75f2c8e Restored old (tail recursive!) version of repeat.
berghofe
parents: 14078
diff changeset
   150
  let
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   151
    val (x, ys) = scan1 xs;
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   152
    val (y, zs) = scan2 x ys;
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   153
  in ((x, y), zs) end;
14078
cddad2aa025b integrated optimizations by Sebastian Skalberg,
kleing
parents: 13795
diff changeset
   154
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   155
fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
24025
77e3e5781a99 added :|-- (dependent projection);
wenzelm
parents: 23699
diff changeset
   156
fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   157
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   158
fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   159
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
25999
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
   160
fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
   161
fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   162
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   163
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   164
(* generic scanners *)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   165
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   166
fun fail _ = raise FAIL NONE;
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   167
fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   168
fun succeed y xs = (y, xs);
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   169
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   170
fun some _ [] = raise MORE ()
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   171
  | some f (x :: xs) =
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   172
      (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   173
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   174
fun one _ [] = raise MORE ()
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   175
  | one pred (x :: xs) =
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   176
      if pred x then (x, xs) else raise FAIL NONE;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   177
19306
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   178
fun $$ a = one (fn s: string => s = a);
73137c0b26f5 added ~$$ (negative literal);
wenzelm
parents: 19291
diff changeset
   179
fun ~$$ a = one (fn s: string => s <> a);
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   180
14833
30556b84af7c Output.error;
wenzelm
parents: 14726
diff changeset
   181
fun this ys xs =
14726
9657c23cc3e7 added Scan.list;
wenzelm
parents: 14686
diff changeset
   182
  let
9657c23cc3e7 added Scan.list;
wenzelm
parents: 14686
diff changeset
   183
    fun drop_prefix [] xs = xs
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   184
      | drop_prefix (_ :: _) [] = raise MORE ()
14726
9657c23cc3e7 added Scan.list;
wenzelm
parents: 14686
diff changeset
   185
      | drop_prefix (y :: ys) (x :: xs) =
19291
798192b86c41 made $$ and "this" monomorphic (string);
wenzelm
parents: 18683
diff changeset
   186
          if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
14726
9657c23cc3e7 added Scan.list;
wenzelm
parents: 14686
diff changeset
   187
  in (ys, drop_prefix ys xs) end;
9657c23cc3e7 added Scan.list;
wenzelm
parents: 14686
diff changeset
   188
40627
becf5d5187cc renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents: 38875
diff changeset
   189
fun this_string s = this (raw_explode s) >> K s;  (*primitive string -- no symbols here!*)
14907
c77fda9b6cf0 added this_string;
wenzelm
parents: 14833
diff changeset
   190
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   191
fun many _ [] = raise MORE ()
21858
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 19473
diff changeset
   192
  | many pred (lst as x :: xs) =
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 19473
diff changeset
   193
      if pred x then apfst (cons x) (many pred xs)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   194
      else ([], lst);
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   195
25999
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
   196
fun many1 pred = one pred ::: many pred;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   197
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   198
fun optional scan def = scan || succeed def;
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   199
fun option scan = (scan >> SOME) || succeed NONE;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   200
13795
cfa3441c5238 Some tuning:
berghofe
parents: 11523
diff changeset
   201
fun repeat scan =
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   202
  let
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   203
    fun rep ys xs =
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   204
      (case (SOME (scan xs) handle FAIL _ => NONE) of
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   205
        NONE => (rev ys, xs)
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   206
      | SOME (y, xs') => rep (y :: ys) xs');
14108
eaf3c75f2c8e Restored old (tail recursive!) version of repeat.
berghofe
parents: 14078
diff changeset
   207
  in rep [] end;
13795
cfa3441c5238 Some tuning:
berghofe
parents: 11523
diff changeset
   208
25999
f8bcd311d501 added ::: / @@@ scanner combinators;
wenzelm
parents: 24595
diff changeset
   209
fun repeat1 scan = scan ::: repeat scan;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   210
61476
1884c40f1539 tuned signature;
wenzelm
parents: 61466
diff changeset
   211
fun repeats scan = repeat scan >> flat;
1884c40f1539 tuned signature;
wenzelm
parents: 61466
diff changeset
   212
fun repeats1 scan = repeat1 scan >> flat;
1884c40f1539 tuned signature;
wenzelm
parents: 61466
diff changeset
   213
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   214
fun single scan = scan >> (fn x => [x]);
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   215
fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   216
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   217
fun max leq scan1 scan2 xs =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   218
  (case (option scan1 xs, option scan2 xs) of
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   219
    ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   220
  | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   221
  | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   222
  | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   223
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   224
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   225
fun ahead scan xs = (fst (scan xs), xs);
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   226
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   227
fun unless test scan =
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   228
  ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   229
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   230
fun first [] = fail
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   231
  | first (scan :: scans) = scan || first scans;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   232
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   233
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   234
(* state based scanners *)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   235
9122
addbea344673 added state: 'a * 'b -> 'a * ('a * 'b);
wenzelm
parents: 8806
diff changeset
   236
fun state (st, xs) = (st, (st, xs));
addbea344673 added state: 'a * 'b -> 'a * ('a * 'b);
wenzelm
parents: 8806
diff changeset
   237
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   238
fun depend scan (st, xs) =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   239
  let val ((st', y), xs') = scan st xs
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   240
  in (y, (st', xs')) end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   241
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   242
fun peek scan = depend (fn st => scan st >> pair st);
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   243
55104
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
   244
fun provide pred st scan xs =
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
   245
  let val (y, (st', xs')) = scan (st, xs)
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
   246
  in if pred st' then (y, xs') else fail () end;
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
   247
8284c0d5bf52 clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents: 48743
diff changeset
   248
fun pass st = provide (K true) st;
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   249
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   250
fun lift scan (st, xs) =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   251
  let val (y, xs') = scan xs
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   252
  in (y, (st, xs')) end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   253
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   254
fun unlift scan = pass () scan;
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   255
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   256
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   257
(* trace input *)
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   258
23699
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   259
fun trace scan xs =
5a4527f3ac79 infixr || (more efficient);
wenzelm
parents: 23683
diff changeset
   260
  let val (y, xs') = scan xs
33957
e9afca2118d4 normalized uncurry take/drop
haftmann
parents: 33955
diff changeset
   261
  in ((y, take (length xs - length xs') xs), xs') end;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   262
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   263
27731
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   264
(* stopper *)
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   265
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   266
datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   267
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   268
fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   269
fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   270
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   271
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   272
(* finite scans *)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   273
27731
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   274
fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   275
  let
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   276
    fun lost () = raise ABORT (fn () => "Bad scanner: lost stopper of finite scan!");
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   277
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   278
    fun stop [] = lost ()
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   279
      | stop lst =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   280
          let val (xs, x) = split_last lst
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   281
          in if is_stopper x then ((), xs) else lost () end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   282
  in
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   283
    if exists is_stopper input then
43947
9b00f09f7721 defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents: 40627
diff changeset
   284
      raise ABORT (fn () => "Stopper may not occur in input of finite scan!")
27731
a7444ded92cf abstract type stopper, may depend on final input;
wenzelm
parents: 25999
diff changeset
   285
    else (strict scan --| lift stop) (state, input @ [mk_stopper input])
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   286
  end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   287
15664
7c150afba112 added some, peek, trace'; tuned;
wenzelm
parents: 15570
diff changeset
   288
fun finite stopper scan = unlift (finite' stopper (lift scan));
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   289
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   290
fun read stopper scan xs =
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   291
  (case error (finite stopper (option scan)) xs of
15531
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   292
    (y as SOME _, []) => y
08c8dad8e399 Deleted Library.option type.
skalberg
parents: 14981
diff changeset
   293
  | _ => NONE);
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   294
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   295
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   296
(* infinite scans -- draining state-based source *)
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   297
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   298
fun drain get stopper scan ((state, xs), src) =
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   299
  (scan (state, xs), src) handle MORE () =>
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   300
    (case get src of
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   301
      ([], _) => (finite' stopper scan (state, xs), src)
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   302
    | (xs', src') => drain get stopper scan ((state, xs @ xs'), src'));
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   303
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   304
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   305
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   306
(** datatype lexicon -- position tree **)
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   307
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   308
datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   309
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   310
val empty_lexicon = Lexicon Symtab.empty;
59071
wenzelm
parents: 58864
diff changeset
   311
fun is_empty_lexicon (Lexicon tab) = Symtab.is_empty tab;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   312
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   313
fun is_literal _ [] = false
32784
1a5dde5079ac eliminated redundant bindings;
wenzelm
parents: 29606
diff changeset
   314
  | is_literal (Lexicon tab) (c :: cs) =
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   315
      (case Symtab.lookup tab c of
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   316
        SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   317
      | NONE => false);
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   318
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   319
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   320
(* scan longest match *)
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   321
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   322
fun literal lexicon =
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   323
  let
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   324
    fun finish (SOME (res, rest)) = (rev res, rest)
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   325
      | finish NONE = raise FAIL NONE;
58850
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   326
    fun scan _ res (Lexicon tab) [] =
1bb0ad7827b4 discontinued obsolete tty and prompt;
wenzelm
parents: 55104
diff changeset
   327
          if Symtab.is_empty tab then finish res else raise MORE ()
32784
1a5dde5079ac eliminated redundant bindings;
wenzelm
parents: 29606
diff changeset
   328
      | scan path res (Lexicon tab) (c :: cs) =
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   329
          (case Symtab.lookup tab (fst c) of
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   330
            SOME (tip, lex) =>
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   331
              let val path' = c :: path
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   332
              in scan path' (if tip then SOME (path', cs) else res) lex cs end
27784
wenzelm
parents: 27782
diff changeset
   333
          | NONE => finish res);
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   334
  in scan [] NONE lexicon end;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   335
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   336
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   337
(* build lexicons *)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   338
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   339
fun extend_lexicon chrs lexicon =
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   340
  let
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   341
    fun ext [] lex = lex
32784
1a5dde5079ac eliminated redundant bindings;
wenzelm
parents: 29606
diff changeset
   342
      | ext (c :: cs) (Lexicon tab) =
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   343
          (case Symtab.lookup tab c of
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   344
            SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   345
          | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   346
  in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   347
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   348
fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   349
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   350
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   351
(* merge lexicons *)
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   352
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   353
fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   354
  let
27782
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   355
    val path' = d :: path;
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   356
    val content = dest path' lex;
377810fd718e datatype lexicon: alternative representation using nested Symtab.table;
wenzelm
parents: 27765
diff changeset
   357
  in append (if tip then rev path' :: content else content) end) tab [];
27765
5df443dd9deb reorganized lexicon: allow scanning of annotated symbols, tuned representation and interfaces;
wenzelm
parents: 27731
diff changeset
   358
59196
73a6403637b3 more toplevel pretty printing;
wenzelm
parents: 59071
diff changeset
   359
val dest_lexicon = sort_strings o map implode o dest [];
59071
wenzelm
parents: 58864
diff changeset
   360
wenzelm
parents: 58864
diff changeset
   361
fun merge_lexicons (lex1, lex2) =
wenzelm
parents: 58864
diff changeset
   362
  if pointer_eq (lex1, lex2) then lex1
wenzelm
parents: 58864
diff changeset
   363
  else if is_empty_lexicon lex1 then lex2
wenzelm
parents: 58864
diff changeset
   364
  else fold extend_lexicon (dest [] lex2) lex1;
6116
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   365
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   366
end;
8ba2f25610f7 files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff changeset
   367
37216
3165bc303f66 modernized some structure names, keeping a few legacy aliases;
wenzelm
parents: 33957
diff changeset
   368
structure Basic_Scan: BASIC_SCAN = Scan;
3165bc303f66 modernized some structure names, keeping a few legacy aliases;
wenzelm
parents: 33957
diff changeset
   369
open Basic_Scan;