author  wenzelm 
Mon, 09 Mar 1998 16:12:19 +0100  
changeset 4702  ffbaf431665d 
child 4756  329c09e15991 
permissions  rwrr 
4702
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

1 
(* Title: Pure/Syntax/scan.ML 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

2 
ID: $Id$ 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

3 
Author: Markus Wenzel and Tobias Nipkow, TU Muenchen 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

4 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

5 
Generic scanners (for potentially infinite input). 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

6 
*) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

7 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

8 
infix 5    ^^; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

9 
infix 3 >>; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

10 
infix 0 ; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

11 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

12 
signature BASIC_SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

13 
sig 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

14 
val !! : ('a > string) > ('a > 'b) > 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

15 
val >> : ('a > 'b * 'c) * ('b > 'd) > 'a > 'd * 'c 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

16 
val  : ('a > 'b) * ('a > 'b) > 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

17 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > ('b * 'd) * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

18 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > 'd * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

19 
val  : ('a > 'b * 'c) * ('c > 'd * 'e) > 'a > 'b * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

20 
val ^^ : ('a > string * 'b) * ('b > string * 'c) > 'a > string * 'c 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

21 
val $$ : ''a > ''a list > ''a * ''a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

22 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

23 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

24 
signature SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

25 
sig 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

26 
include BASIC_SCAN 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

27 
val fail: 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

28 
val succeed: 'a > 'b > 'a * 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

29 
val one: ('a > bool) > 'a list > 'a * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

30 
val any: ('a > bool) > 'a list > 'a list * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

31 
val any1: ('a > bool) > 'a list > 'a list * 'a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

32 
val optional: ('a > 'b * 'a) > 'b > 'a > 'b * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

33 
val option: ('a > 'b * 'a) > 'a > 'b option * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

34 
val repeat: ('a > 'b * 'a) > 'a > 'b list * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

35 
val repeat1: ('a > 'b * 'a) > 'a > 'b list * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

36 
val max: ('a * 'a > bool) > ('b > 'a * 'b) > ('b > 'a * 'b) > 'b > 'a * 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

37 
val ahead: ('a > 'b * 'c) > 'a > 'b * 'a 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

38 
val depend: ('a > 'b > ('c * 'd) * 'e) > 'a * 'b > 'd * ('c * 'e) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

39 
val lift: ('a > 'b * 'c) > 'd * 'a > 'b * ('d * 'c) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

40 
val pass: 'a > ('a * 'b > 'c * ('d * 'e)) > 'b > 'c * 'e 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

41 
val force: ('a > 'b) > 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

42 
val finite': ''a > ('b * ''a list > 'c * ('d * ''a list)) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

43 
> 'b * ''a list > 'c * ('d * ''a list) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

44 
val finite: ''a > (''a list > 'b * ''a list) > ''a list > 'b * ''a list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

45 
val error: ('a > 'b) > 'a > 'b 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

46 
val source: ('a > 'b list * 'a) > ('b list * 'a > 'c) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

47 
> ('b list > 'd * 'b list) > 'a > 'd list * 'c 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

48 
type lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

49 
val dest_lexicon: lexicon > string list list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

50 
val make_lexicon: string list list > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

51 
val empty_lexicon: lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

52 
val extend_lexicon: lexicon > string list list > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

53 
val merge_lexicons: lexicon > lexicon > lexicon 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

54 
val literal: lexicon > string list > string list * string list 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

55 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

56 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

57 
structure Scan: SCAN = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

58 
struct 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

59 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

60 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

61 
(** scanners **) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

62 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

63 
exception MORE; (*need more input*) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

64 
exception FAIL; (*try alternatives*) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

65 
exception ABORT of string; (*dead end*) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

66 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

67 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

68 
(* scanner combinators *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

69 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

70 
fun (scan >> f) xs = apfst f (scan xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

71 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

72 
fun (scan1  scan2) xs = scan1 xs handle FAIL => scan2 xs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

73 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

74 
fun (scan1  scan2) xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

75 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

76 
val (x, ys) = scan1 xs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

77 
val (y, zs) = scan2 ys; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

78 
in ((x, y), zs) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

79 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

80 
fun (scan1  scan2) = scan1  scan2 >> #2; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

81 
fun (scan1  scan2) = scan1  scan2 >> #1; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

82 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

83 
fun (scan1 ^^ scan2) = scan1  scan2 >> op ^; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

84 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

85 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

86 
(* generic scanners *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

87 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

88 
fun fail _ = raise FAIL; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

89 
fun succeed y xs = (y, xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

90 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

91 
fun one _ [] = raise MORE 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

92 
 one pred (x :: xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

93 
if pred x then (x, xs) else raise FAIL; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

94 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

95 
fun $$ _ [] = raise MORE 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

96 
 $$ a (x :: xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

97 
if a = x then (x, xs) else raise FAIL; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

98 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

99 
fun any _ [] = raise MORE 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

100 
 any pred (lst as x :: xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

101 
if pred x then apfst (cons x) (any pred xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

102 
else ([], lst); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

103 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

104 
fun any1 pred = one pred  any pred >> op ::; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

105 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

106 
fun optional scan def = scan  succeed def; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

107 
fun option scan = optional (scan >> Some) None; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

108 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

109 
fun repeat scan xs = (scan  repeat scan >> op ::  succeed []) xs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

110 
fun repeat1 scan = scan  repeat scan >> op ::; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

111 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

112 
fun max leq scan1 scan2 xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

113 
(case (option scan1 xs, option scan2 xs) of 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

114 
((None, _), (None, _)) => raise FAIL 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

115 
 ((Some tok1, xs'), (None, _)) => (tok1, xs') 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

116 
 ((None, _), (Some tok2, xs')) => (tok2, xs') 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

117 
 ((Some tok1, xs1'), (Some tok2, xs2')) => 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

118 
if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2')); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

119 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

120 
fun ahead scan xs = (fst (scan xs), xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

121 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

122 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

123 
(* state based scanners *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

124 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

125 
fun depend scan (st, xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

126 
let val ((st', y), xs') = scan st xs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

127 
in (y, (st', xs')) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

128 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

129 
fun lift scan (st, xs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

130 
let val (y, xs') = scan xs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

131 
in (y, (st, xs')) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

132 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

133 
fun pass st scan xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

134 
let val (y, (_, xs')) = scan (st, xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

135 
in (y, xs') end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

136 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

137 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

138 
(* exception handling *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

139 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

140 
fun !! err scan xs = scan xs handle FAIL => raise ABORT (err xs); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

141 
fun force scan xs = scan xs handle MORE => raise FAIL; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

142 
fun error scan xs = scan xs handle ABORT msg => Library.error msg; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

143 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

144 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

145 
(* finite scans *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

146 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

147 
fun finite' stopper scan (state, input) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

148 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

149 
fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!"; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

150 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

151 
fun stop [] = lost () 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

152 
 stop lst = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

153 
let val (xs, x) = split_last lst 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

154 
in if x = stopper then ((), xs) else lost () end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

155 
in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

156 
if exists (equal stopper) input then 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

157 
raise ABORT "Stopper may not occur in input of finite scan!" 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

158 
else (force scan  lift stop) (state, input @ [stopper]) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

159 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

160 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

161 
fun finite stopper scan xs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

162 
let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

163 
in (y, xs') end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

164 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

165 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

166 
(* infinite scans *) (* FIXME state based!? *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

167 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

168 
fun source get unget scan src = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

169 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

170 
fun try_more x = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

171 
scan x handle MORE => raise FAIL  ABORT _ => raise FAIL; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

172 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

173 
fun drain (xs, s) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

174 
(((scan  repeat try_more) >> op ::) xs, s) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

175 
handle MORE => 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

176 
let val (more_xs, s') = get s in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

177 
if null more_xs then raise ABORT "Input source exhausted" 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

178 
else drain (xs @ more_xs, s') 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

179 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

180 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

181 
val ((ys, xs'), src') = drain (get src); 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

182 
in (ys, unget (xs', src')) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

183 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

184 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

185 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

186 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

187 
(** datatype lexicon **) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

188 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

189 
datatype lexicon = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

190 
Empty  
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

191 
Branch of string * string list * lexicon * lexicon * lexicon; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

192 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

193 
val no_literal = []; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

194 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

195 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

196 
(* dest_lexicon *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

197 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

198 
fun dest_lexicon Empty = [] 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

199 
 dest_lexicon (Branch (_, [], lt, eq, gt)) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

200 
dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

201 
 dest_lexicon (Branch (_, cs, lt, eq, gt)) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

202 
dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

203 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

204 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

205 
(* empty, extend, make, merge lexicons *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

206 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

207 
val empty_lexicon = Empty; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

208 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

209 
fun extend_lexicon lexicon chrss = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

210 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

211 
fun ext (lex, chrs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

212 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

213 
fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

214 
if c < d then Branch (d, a, add lt chs, eq, gt) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

215 
else if c > d then Branch (d, a, lt, eq, add gt chs) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

216 
else Branch (d, if null cs then chrs else a, lt, add eq cs, gt) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

217 
 add Empty [c] = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

218 
Branch (c, chrs, Empty, Empty, Empty) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

219 
 add Empty (c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

220 
Branch (c, no_literal, Empty, add Empty cs, Empty) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

221 
 add lex [] = lex; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

222 
in add lex chrs end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

223 
in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

224 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

225 
val make_lexicon = extend_lexicon empty_lexicon; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

226 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

227 
fun merge_lexicons lex1 lex2 = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

228 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

229 
val chss1 = dest_lexicon lex1; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

230 
val chss2 = dest_lexicon lex2; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

231 
in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

232 
if chss2 subset chss1 then lex1 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

233 
else if chss1 subset chss2 then lex2 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

234 
else extend_lexicon lex1 chss2 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

235 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

236 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

237 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

238 
(* scan literal *) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

239 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

240 
fun literal lex chrs = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

241 
let 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

242 
fun lit Empty res _ = res 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

243 
 lit (Branch _) _ [] = raise MORE 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

244 
 lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) = 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

245 
if c < d then lit lt res chs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

246 
else if c > d then lit gt res chs 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

247 
else lit eq (if a = no_literal then res else Some (a, cs)) cs; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

248 
in 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

249 
(case lit lex None chrs of 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

250 
None => raise FAIL 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

251 
 Some res => res) 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

252 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

253 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

254 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

255 
end; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

256 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

257 

ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

258 
structure BasicScan: BASIC_SCAN = Scan; 
ffbaf431665d
Generic scanners (for potentially infinite input)  replaces Scanner;
wenzelm
parents:
diff
changeset

259 
open BasicScan; 