author  wenzelm 
Tue, 26 May 1998 12:29:27 +0200  
changeset 4968  c68a9c510c90 
parent 4958  ad2acb8d2db4 
child 5048  2af6b01e7ab6 
permissions  rwrr 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

1 
(* Title: Pure/Syntax/scan.ML 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

2 
ID: $Id$ 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

3 
Author: Markus Wenzel and Tobias Nipkow, TU Muenchen 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

4 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

5 
Generic scanners (for potentially infinite input). 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

6 
*) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

7 

4924  8 
infix 5  :   ^^; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

9 
infix 3 >>; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

10 
infix 0 ; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

11 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

12 
signature BASIC_SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

13 
sig 
4919  14 
val !! : ('a * string option > string) > ('a > 'b) > 'a > 'b 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

15 
val >> : ('a > 'b * 'c) * ('b > 'd) > 'a > 'd * 'c 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

16 
val  : ('a > 'b) * ('a > 'b) > 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

17 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > ('b * 'd) * 'e 
4924  18 
val : : ('a > 'b * 'c) * ('b > 'c > 'd * 'e) > 'a > ('b * 'd) * 'e 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

19 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > 'd * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

20 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > 'b * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

21 
val ^^ : ('a > string * 'b) * ('b > string * 'c) > 'a > string * 'c 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

22 
val $$ : ''a > ''a list > ''a * ''a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

23 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

24 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

25 
signature SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

26 
sig 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

27 
include BASIC_SCAN 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

28 
val fail: 'a > 'b 
4919  29 
val fail_with: ('a > string) > 'a > 'b 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

30 
val succeed: 'a > 'b > 'a * 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

31 
val one: ('a > bool) > 'a list > 'a * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

32 
val any: ('a > bool) > 'a list > 'a list * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

33 
val any1: ('a > bool) > 'a list > 'a list * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

34 
val optional: ('a > 'b * 'a) > 'b > 'a > 'b * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

35 
val option: ('a > 'b * 'a) > 'a > 'b option * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

36 
val repeat: ('a > 'b * 'a) > 'a > 'b list * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

37 
val repeat1: ('a > 'b * 'a) > 'a > 'b list * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

38 
val max: ('a * 'a > bool) > ('b > 'a * 'b) > ('b > 'a * 'b) > 'b > 'a * 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

39 
val ahead: ('a > 'b * 'c) > 'a > 'b * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

40 
val depend: ('a > 'b > ('c * 'd) * 'e) > 'a * 'b > 'd * ('c * 'e) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

41 
val lift: ('a > 'b * 'c) > 'd * 'a > 'b * ('d * 'c) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

42 
val pass: 'a > ('a * 'b > 'c * ('d * 'e)) > 'b > 'c * 'e 
4756  43 
val try: ('a > 'b) > 'a > 'b 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

44 
val force: ('a > 'b) > 'a > 'b 
4756  45 
val prompt: string > ('a > 'b) > 'a > 'b 
4937  46 
val finite': 'a * ('a > bool) > ('b * 'a list > 'c * ('d * 'a list)) 
47 
> 'b * 'a list > 'c * ('d * 'a list) 

48 
val finite: 'a * ('a > bool) > ('a list > 'b * 'a list) > 'a list > 'b * 'a list 

4958  49 
val catch: ('a > 'b) > 'a > 'b 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

50 
val error: ('a > 'b) > 'a > 'b 
4958  51 
val source': string > (string > 'a > 'b list * 'a) > ('b list * 'a > 'c) > 
52 
'b * ('b > bool) > ('d * 'b list > 'e list * ('d * 'b list)) > 

53 
('d * 'b list > 'f * ('d * 'b list)) option > 'd * 'a > 'e list * ('d * 'c) 

54 
val source: string > (string > 'a > 'b list * 'a) > ('b list * 'a > 'c) > 

55 
'b * ('b > bool) > ('b list > 'd list * 'b list) > 

56 
('b list > 'e * 'b list) option > 'a > 'd list * 'c 

4937  57 
val single: ('a > 'b * 'a) > 'a > 'b list * 'a 
58 
val bulk: ('a > 'b * 'a) > 'a > 'b list * 'a 

4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

59 
type lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

60 
val dest_lexicon: lexicon > string list list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

61 
val make_lexicon: string list list > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

62 
val empty_lexicon: lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

63 
val extend_lexicon: lexicon > string list list > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

64 
val merge_lexicons: lexicon > lexicon > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

65 
val literal: lexicon > string list > string list * string list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

66 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

67 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

68 
structure Scan: SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

69 
struct 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

70 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

71 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

72 
(** scanners **) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

73 

4958  74 
exception MORE of string option; (*need more input (prompt)*) 
4919  75 
exception FAIL of string option; (*try alternatives (reason of failure)*) 
4756  76 
exception ABORT of string; (*dead end*) 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

77 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

78 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

79 
(* scanner combinators *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

80 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

81 
fun (scan >> f) xs = apfst f (scan xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

82 

4919  83 
fun (scan1  scan2) xs = scan1 xs handle FAIL _ => scan2 xs; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

84 

4937  85 
(*dependent pairing*) 
4924  86 
fun (scan1 : scan2) xs = 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

87 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

88 
val (x, ys) = scan1 xs; 
4924  89 
val (y, zs) = scan2 x ys; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

90 
in ((x, y), zs) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

91 

4924  92 
fun (scan1  scan2) = scan1 : (fn _ => scan2); 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

93 
fun (scan1  scan2) = scan1  scan2 >> #2; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

94 
fun (scan1  scan2) = scan1  scan2 >> #1; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

95 
fun (scan1 ^^ scan2) = scan1  scan2 >> op ^; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

96 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

97 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

98 
(* generic scanners *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

99 

4919  100 
fun fail _ = raise FAIL None; 
101 
fun fail_with msg_of xs = raise FAIL (Some (msg_of xs)); 

4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

102 
fun succeed y xs = (y, xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

103 

4756  104 
fun one _ [] = raise MORE None 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

105 
 one pred (x :: xs) = 
4919  106 
if pred x then (x, xs) else raise FAIL None; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

107 

4756  108 
fun $$ _ [] = raise MORE None 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

109 
 $$ a (x :: xs) = 
4919  110 
if a = x then (x, xs) else raise FAIL None; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

111 

4756  112 
fun any _ [] = raise MORE None 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

113 
 any pred (lst as x :: xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

114 
if pred x then apfst (cons x) (any pred xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

115 
else ([], lst); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

116 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

117 
fun any1 pred = one pred  any pred >> op ::; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

118 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

119 
fun optional scan def = scan  succeed def; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

120 
fun option scan = optional (scan >> Some) None; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

121 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

122 
fun repeat scan xs = (scan  repeat scan >> op ::  succeed []) xs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

123 
fun repeat1 scan = scan  repeat scan >> op ::; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

124 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

125 
fun max leq scan1 scan2 xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

126 
(case (option scan1 xs, option scan2 xs) of 
4919  127 
((None, _), (None, _)) => raise FAIL None (*looses FAIL msg!*) 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

128 
 ((Some tok1, xs'), (None, _)) => (tok1, xs') 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

129 
 ((None, _), (Some tok2, xs')) => (tok2, xs') 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

130 
 ((Some tok1, xs1'), (Some tok2, xs2')) => 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

131 
if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2')); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

132 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

133 
fun ahead scan xs = (fst (scan xs), xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

134 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

135 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

136 
(* state based scanners *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

137 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

138 
fun depend scan (st, xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

139 
let val ((st', y), xs') = scan st xs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

140 
in (y, (st', xs')) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

141 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

142 
fun lift scan (st, xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

143 
let val (y, xs') = scan xs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

144 
in (y, (st, xs')) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

145 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

146 
fun pass st scan xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

147 
let val (y, (_, xs')) = scan (st, xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

148 
in (y, xs') end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

149 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

150 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

151 
(* exception handling *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

152 

4919  153 
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg)); 
154 
fun try scan xs = scan xs handle MORE _ => raise FAIL None  ABORT _ => raise FAIL None; 

155 
fun force scan xs = scan xs handle MORE _ => raise FAIL None; 

4756  156 
fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str); 
4958  157 
fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg); 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

158 
fun error scan xs = scan xs handle ABORT msg => Library.error msg; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

159 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

160 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

161 
(* finite scans *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

162 

4937  163 
fun finite' (stopper, is_stopper) scan (state, input) = 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

164 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

165 
fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!"; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

166 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

167 
fun stop [] = lost () 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

168 
 stop lst = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

169 
let val (xs, x) = split_last lst 
4937  170 
in if is_stopper x then ((), xs) else lost () end; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

171 
in 
4937  172 
if exists is_stopper input then 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

173 
raise ABORT "Stopper may not occur in input of finite scan!" 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

174 
else (force scan  lift stop) (state, input @ [stopper]) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

175 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

176 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

177 
fun finite stopper scan xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

178 
let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

179 
in (y, xs') end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

180 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

181 

4903  182 
(* infinite scans  draining statebased source *) 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

183 

4958  184 
fun drain def_prmpt get stopper scan ((state, xs), src) = 
185 
(scan (state, xs), src) handle MORE prmpt => 

186 
(case get (if_none prmpt def_prmpt) src of 

187 
([], _) => (finite' stopper scan (state, xs), src) 

188 
 (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src')); 

189 

190 
fun source' def_prmpt get unget stopper scanner opt_recover (state, src) = 

4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

191 
let 
4968  192 
fun drain_with scan = drain def_prmpt get stopper scan; 
4958  193 

194 
fun drain_loop recover inp = 

195 
drain_with (catch scanner) inp handle FAIL msg => 

196 
(warning (if_none msg "Syntax error."); warning "Trying to recover ..."; 

197 
drain_loop recover (apfst snd (drain_with recover inp))); 

4937  198 

4958  199 
val ((ys, (state', xs')), src') = 
200 
(case (get def_prmpt src, opt_recover) of 

201 
(([], s), _) => (([], (state, [])), s) 

202 
 ((xs, s), None) => drain_with (error scanner) ((state, xs), s) 

203 
 ((xs, s), Some scan) => drain_loop scan ((state, xs), s)); 

4937  204 
in 
4958  205 
(ys, (state', unget (xs', src'))) 
4937  206 
end; 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

207 

4958  208 
fun source def_prmpt get unget stopper scan opt_recover src = 
209 
let val (ys, ((), src')) = 

210 
source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src) 

4953  211 
in (ys, src') end; 
212 

4756  213 
fun single scan = scan >> (fn x => [x]); 
4937  214 
fun bulk scan = scan  repeat (try scan) >> (op ::); 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

215 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

216 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

217 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

218 
(** datatype lexicon **) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

219 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

220 
datatype lexicon = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

221 
Empty  
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

222 
Branch of string * string list * lexicon * lexicon * lexicon; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

223 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

224 
val no_literal = []; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

225 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

226 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

227 
(* dest_lexicon *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

228 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

229 
fun dest_lexicon Empty = [] 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

230 
 dest_lexicon (Branch (_, [], lt, eq, gt)) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

231 
dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

232 
 dest_lexicon (Branch (_, cs, lt, eq, gt)) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

233 
dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

234 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

235 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

236 
(* empty, extend, make, merge lexicons *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

237 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

238 
val empty_lexicon = Empty; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

239 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

240 
fun extend_lexicon lexicon chrss = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

241 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

242 
fun ext (lex, chrs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

243 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

244 
fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

245 
if c < d then Branch (d, a, add lt chs, eq, gt) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

246 
else if c > d then Branch (d, a, lt, eq, add gt chs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

247 
else Branch (d, if null cs then chrs else a, lt, add eq cs, gt) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

248 
 add Empty [c] = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

249 
Branch (c, chrs, Empty, Empty, Empty) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

250 
 add Empty (c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

251 
Branch (c, no_literal, Empty, add Empty cs, Empty) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

252 
 add lex [] = lex; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

253 
in add lex chrs end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

254 
in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

255 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

256 
val make_lexicon = extend_lexicon empty_lexicon; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

257 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

258 
fun merge_lexicons lex1 lex2 = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

259 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

260 
val chss1 = dest_lexicon lex1; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

261 
val chss2 = dest_lexicon lex2; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

262 
in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

263 
if chss2 subset chss1 then lex1 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

264 
else if chss1 subset chss2 then lex2 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

265 
else extend_lexicon lex1 chss2 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

266 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

267 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

268 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

269 
(* scan literal *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

270 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

271 
fun literal lex chrs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

272 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

273 
fun lit Empty res _ = res 
4756  274 
 lit (Branch _) _ [] = raise MORE None 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

275 
 lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

276 
if c < d then lit lt res chs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

277 
else if c > d then lit gt res chs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

278 
else lit eq (if a = no_literal then res else Some (a, cs)) cs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

279 
in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

280 
(case lit lex None chrs of 
4919  281 
None => raise FAIL None 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

282 
 Some res => res) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

283 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

284 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

285 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

286 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

287 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

288 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

289 
structure BasicScan: BASIC_SCAN = Scan; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

290 
open BasicScan; 