| author | paulson | 
| Thu, 06 Sep 2007 17:03:53 +0200 | |
| changeset 24547 | 64c20ee76bc1 | 
| parent 24025 | 77e3e5781a99 | 
| child 24595 | 5c290506fbc0 | 
| permissions | -rw-r--r-- | 
| 11523 | 1 | (* Title: Pure/General/scan.ML | 
| 2 | ID: $Id$ | |
| 3 | Author: Markus Wenzel and Tobias Nipkow, TU Muenchen | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 4 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 5 | Generic scanners (for potentially infinite input). | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 6 | *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 7 | |
| 24025 | 8 | infix 5 -- :-- :|-- |-- --| ^^; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 9 | infix 3 >>; | 
| 23699 | 10 | infixr 0 ||; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 11 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 12 | signature BASIC_SCAN = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 13 | sig | 
| 14677 | 14 | (*error msg handler*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 15 |   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
 | 
| 14677 | 16 | (*apply function*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 17 |   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
 | 
| 14677 | 18 | (*alternative*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 19 |   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
 | 
| 14677 | 20 | (*sequential pairing*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 21 |   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
 | 
| 14677 | 22 | (*dependent pairing*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 23 |   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
 | 
| 24025 | 24 | (*projections*) | 
| 25 |   val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
 | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 26 |   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 27 |   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
 | 
| 14677 | 28 | (*concatenation*) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 29 |   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
 | 
| 14677 | 30 | (*one element literal*) | 
| 19291 | 31 | val $$ : string -> string list -> string * string list | 
| 19306 | 32 | val ~$$ : string -> string list -> string * string list | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 33 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 34 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 35 | signature SCAN = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 36 | sig | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 37 | include BASIC_SCAN | 
| 23699 | 38 |   val prompt: string -> ('a -> 'b) -> 'a -> 'b
 | 
| 39 |   val error: ('a -> 'b) -> 'a -> 'b
 | |
| 40 |   val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
 | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 41 | val fail: 'a -> 'b | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 42 |   val fail_with: ('a -> string) -> 'a -> 'b
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 43 | val succeed: 'a -> 'b -> 'a * 'b | 
| 15664 | 44 |   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
 | 
| 45 |   val one: ('a -> bool) -> 'a list -> 'a * 'a list
 | |
| 19291 | 46 | val this: string list -> string list -> string list * string list | 
| 14927 | 47 | val this_string: string -> string list -> string * string list | 
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 48 |   val many: ('a -> bool) -> 'a list -> 'a list * 'a list
 | 
| 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 49 |   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
 | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 50 |   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 51 |   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 52 |   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 53 |   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | 
| 23699 | 54 |   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | 
| 55 |   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 56 |   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 57 |   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 58 |   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 59 |   val first: ('a -> 'b) list -> 'a -> 'b
 | 
| 14677 | 60 |   val state: 'a * 'b -> 'a * ('a * 'b)
 | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 61 |   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
 | 
| 15664 | 62 |   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
 | 
| 63 |   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
 | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 64 |   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
 | 
| 23699 | 65 |   val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
 | 
| 15664 | 66 |   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
 | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 67 |   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 68 |     -> 'b * 'a list -> 'c * ('d * 'a list)
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 69 |   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 70 |   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
 | 
| 23699 | 71 |   val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b * ('b -> bool) ->
 | 
| 72 |     ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
 | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 73 | type lexicon | 
| 7025 | 74 | val dest_lexicon: lexicon -> string list | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 75 | val make_lexicon: string list list -> lexicon | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 76 | val empty_lexicon: lexicon | 
| 22112 | 77 | val extend_lexicon: string list list -> lexicon -> lexicon | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 78 | val merge_lexicons: lexicon -> lexicon -> lexicon | 
| 14686 | 79 | val is_literal: lexicon -> string list -> bool | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 80 | val literal: lexicon -> string list -> string list * string list | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 81 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 82 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 83 | structure Scan: SCAN = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 84 | struct | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 85 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 86 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 87 | (** scanners **) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 88 | |
| 23699 | 89 | (* exceptions *) | 
| 90 | ||
| 11523 | 91 | exception MORE of string option; (*need more input (prompt)*) | 
| 92 | exception FAIL of string option; (*try alternatives (reason of failure)*) | |
| 93 | exception ABORT of string; (*dead end*) | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 94 | |
| 23699 | 95 | fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg)); | 
| 96 | fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE; | |
| 97 | fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE; | |
| 98 | fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str); | |
| 99 | fun error scan xs = scan xs handle ABORT msg => Library.error msg; | |
| 100 | ||
| 101 | fun catch scan xs = scan xs | |
| 102 | handle ABORT msg => raise Fail msg | |
| 103 | | FAIL msg => raise Fail (the_default "Syntax error." msg); | |
| 104 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 105 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 106 | (* scanner combinators *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 107 | |
| 19306 | 108 | fun (scan >> f) xs = scan xs |>> f; | 
| 14078 | 109 | |
| 19306 | 110 | fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs; | 
| 14078 | 111 | |
| 19306 | 112 | fun (scan1 :-- scan2) xs = | 
| 14108 
eaf3c75f2c8e
Restored old (tail recursive!) version of repeat.
 berghofe parents: 
14078diff
changeset | 113 | let | 
| 19306 | 114 | val (x, ys) = scan1 xs; | 
| 115 | val (y, zs) = scan2 x ys; | |
| 116 | in ((x, y), zs) end; | |
| 14078 | 117 | |
| 19306 | 118 | fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2); | 
| 24025 | 119 | fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2; | 
| 19306 | 120 | fun (scan1 |-- scan2) = scan1 -- scan2 >> #2; | 
| 121 | fun (scan1 --| scan2) = scan1 -- scan2 >> #1; | |
| 122 | fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 123 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 124 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 125 | (* generic scanners *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 126 | |
| 15531 | 127 | fun fail _ = raise FAIL NONE; | 
| 128 | fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs)); | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 129 | fun succeed y xs = (y, xs); | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 130 | |
| 15664 | 131 | fun some _ [] = raise MORE NONE | 
| 132 | | some f (x :: xs) = | |
| 133 | (case f x of SOME y => (y, xs) | _ => raise FAIL NONE); | |
| 134 | ||
| 15531 | 135 | fun one _ [] = raise MORE NONE | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 136 | | one pred (x :: xs) = | 
| 15531 | 137 | if pred x then (x, xs) else raise FAIL NONE; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 138 | |
| 19306 | 139 | fun $$ a = one (fn s: string => s = a); | 
| 140 | fun ~$$ a = one (fn s: string => s <> a); | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 141 | |
| 14833 | 142 | fun this ys xs = | 
| 14726 | 143 | let | 
| 144 | fun drop_prefix [] xs = xs | |
| 15531 | 145 | | drop_prefix (_ :: _) [] = raise MORE NONE | 
| 14726 | 146 | | drop_prefix (y :: ys) (x :: xs) = | 
| 19291 | 147 | if (y: string) = x then drop_prefix ys xs else raise FAIL NONE; | 
| 14726 | 148 | in (ys, drop_prefix ys xs) end; | 
| 149 | ||
| 15664 | 150 | fun this_string s = this (explode s) >> K s; (*primitive string -- no symbols here!*) | 
| 14907 | 151 | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 152 | fun many _ [] = raise MORE NONE | 
| 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 153 | | many pred (lst as x :: xs) = | 
| 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 154 | if pred x then apfst (cons x) (many pred xs) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 155 | else ([], lst); | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 156 | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
19473diff
changeset | 157 | fun many1 pred = one pred -- many pred >> op ::; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 158 | |
| 15664 | 159 | fun optional scan def = scan || succeed def; | 
| 160 | fun option scan = (scan >> SOME) || succeed NONE; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 161 | |
| 13795 | 162 | fun repeat scan = | 
| 15664 | 163 | let | 
| 164 | fun rep ys xs = | |
| 165 | (case (SOME (scan xs) handle FAIL _ => NONE) of | |
| 166 | NONE => (rev ys, xs) | |
| 167 | | SOME (y, xs') => rep (y :: ys) xs'); | |
| 14108 
eaf3c75f2c8e
Restored old (tail recursive!) version of repeat.
 berghofe parents: 
14078diff
changeset | 168 | in rep [] end; | 
| 13795 | 169 | |
| 15664 | 170 | fun repeat1 scan = scan -- repeat scan >> op ::; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 171 | |
| 23699 | 172 | fun single scan = scan >> (fn x => [x]); | 
| 173 | fun bulk scan = scan -- repeat (permissive scan) >> (op ::); | |
| 174 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 175 | fun max leq scan1 scan2 xs = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 176 | (case (option scan1 xs, option scan2 xs) of | 
| 15531 | 177 | ((NONE, _), (NONE, _)) => raise FAIL NONE (*looses FAIL msg!*) | 
| 178 | | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs') | |
| 179 | | ((NONE, _), (SOME tok2, xs')) => (tok2, xs') | |
| 180 | | ((SOME tok1, xs1'), (SOME tok2, xs2')) => | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 181 | if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2')); | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 182 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 183 | fun ahead scan xs = (fst (scan xs), xs); | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 184 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 185 | fun unless test scan = | 
| 15531 | 186 | ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 187 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 188 | fun first [] = fail | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 189 | | first (scan :: scans) = scan || first scans; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 190 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 191 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 192 | (* state based scanners *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 193 | |
| 9122 | 194 | fun state (st, xs) = (st, (st, xs)); | 
| 195 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 196 | fun depend scan (st, xs) = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 197 | let val ((st', y), xs') = scan st xs | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 198 | in (y, (st', xs')) end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 199 | |
| 15664 | 200 | fun peek scan = depend (fn st => scan st >> pair st); | 
| 201 | ||
| 202 | fun pass st scan xs = | |
| 203 | let val (y, (_, xs')) = scan (st, xs) | |
| 204 | in (y, xs') end; | |
| 205 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 206 | fun lift scan (st, xs) = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 207 | let val (y, xs') = scan xs | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 208 | in (y, (st, xs')) end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 209 | |
| 15664 | 210 | fun unlift scan = pass () scan; | 
| 211 | ||
| 212 | ||
| 213 | (* trace input *) | |
| 214 | ||
| 23699 | 215 | fun trace scan xs = | 
| 216 | let val (y, xs') = scan xs | |
| 217 | in ((y, Library.take (length xs - length xs', xs)), xs') end; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 218 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 219 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 220 | (* finite scans *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 221 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 222 | fun finite' (stopper, is_stopper) scan (state, input) = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 223 | let | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 224 | fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!"; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 225 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 226 | fun stop [] = lost () | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 227 | | stop lst = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 228 | let val (xs, x) = split_last lst | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 229 | in if is_stopper x then ((), xs) else lost () end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 230 | in | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 231 | if exists is_stopper input then | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 232 | raise ABORT "Stopper may not occur in input of finite scan!" | 
| 23674 | 233 | else (strict scan --| lift stop) (state, input @ [stopper]) | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 234 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 235 | |
| 15664 | 236 | fun finite stopper scan = unlift (finite' stopper (lift scan)); | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 237 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 238 | fun read stopper scan xs = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 239 | (case error (finite stopper (option scan)) xs of | 
| 15531 | 240 | (y as SOME _, []) => y | 
| 241 | | _ => NONE); | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 242 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 243 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 244 | (* infinite scans -- draining state-based source *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 245 | |
| 23699 | 246 | fun drain def_prompt get stopper scan ((state, xs), src) = | 
| 247 | (scan (state, xs), src) handle MORE prompt => | |
| 248 | (case get (the_default def_prompt prompt) src of | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 249 | ([], _) => (finite' stopper scan (state, xs), src) | 
| 23699 | 250 | | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src')); | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 251 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 252 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 253 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 254 | (** datatype lexicon **) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 255 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 256 | datatype lexicon = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 257 | Empty | | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 258 | Branch of string * string list * lexicon * lexicon * lexicon; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 259 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 260 | val no_literal = []; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 261 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 262 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 263 | (* dest_lexicon *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 264 | |
| 7025 | 265 | fun dest_lex Empty = [] | 
| 266 | | dest_lex (Branch (_, [], lt, eq, gt)) = | |
| 267 | dest_lex lt @ dest_lex eq @ dest_lex gt | |
| 268 | | dest_lex (Branch (_, cs, lt, eq, gt)) = | |
| 269 | dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt; | |
| 270 | ||
| 271 | val dest_lexicon = map implode o dest_lex; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 272 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 273 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 274 | (* empty, extend, make, merge lexicons *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 275 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 276 | val empty_lexicon = Empty; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 277 | |
| 22112 | 278 | fun extend_lexicon [] lexicon = lexicon | 
| 279 | | extend_lexicon chrss lexicon = | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 280 | let | 
| 19306 | 281 | fun ext chrs lex = | 
| 11523 | 282 | let | 
| 19306 | 283 | fun add (chs as c :: cs) (Branch (d, a, lt, eq, gt)) = | 
| 284 | (case fast_string_ord (c, d) of | |
| 285 | LESS => Branch (d, a, add chs lt, eq, gt) | |
| 286 | | EQUAL => Branch (d, if null cs then chrs else a, lt, add cs eq, gt) | |
| 287 | | GREATER => Branch (d, a, lt, eq, add chs gt)) | |
| 288 | | add [c] Empty = Branch (c, chrs, Empty, Empty, Empty) | |
| 289 | | add (c :: cs) Empty = Branch (c, no_literal, Empty, add cs Empty, Empty) | |
| 290 | | add [] lex = lex; | |
| 291 | in add chrs lex end; | |
| 292 | in lexicon |> fold ext (chrss |> subtract (op =) (dest_lex lexicon)) end; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 293 | |
| 22112 | 294 | fun make_lexicon chrss = extend_lexicon chrss empty_lexicon; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 295 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 296 | fun merge_lexicons lex1 lex2 = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 297 | let | 
| 7025 | 298 | val chss1 = dest_lex lex1; | 
| 299 | val chss2 = dest_lex lex2; | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 300 | in | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 301 | if chss2 subset chss1 then lex1 | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 302 | else if chss1 subset chss2 then lex2 | 
| 22112 | 303 | else extend_lexicon chss2 lex1 | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 304 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 305 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 306 | |
| 14686 | 307 | (* is_literal *) | 
| 308 | ||
| 309 | fun is_literal Empty _ = false | |
| 310 | | is_literal _ [] = false | |
| 311 | | is_literal (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = | |
| 19306 | 312 | (case fast_string_ord (c, d) of | 
| 14686 | 313 | LESS => is_literal lt chs | 
| 314 | | EQUAL => a <> no_literal andalso null cs orelse is_literal eq cs | |
| 315 | | GREATER => is_literal gt chs); | |
| 316 | ||
| 317 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 318 | (* scan literal *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 319 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 320 | fun literal lex chrs = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 321 | let | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 322 | fun lit Empty res _ = res | 
| 15531 | 323 | | lit (Branch _) _ [] = raise MORE NONE | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 324 | | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) = | 
| 19306 | 325 | (case fast_string_ord (c, d) of | 
| 14686 | 326 | LESS => lit lt res chs | 
| 15531 | 327 | | EQUAL => lit eq (if a = no_literal then res else SOME (a, cs)) cs | 
| 14686 | 328 | | GREATER => lit gt res chs); | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 329 | in | 
| 15531 | 330 | (case lit lex NONE chrs of | 
| 331 | NONE => raise FAIL NONE | |
| 332 | | SOME res => res) | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 333 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 334 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 335 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 336 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 337 | structure BasicScan: BASIC_SCAN = Scan; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 338 | open BasicScan; |