author | bulwahn |
Wed, 14 Dec 2011 16:30:32 +0100 | |
changeset 45873 | 37ffb8797a63 |
parent 40967 | 5eb59b62e7de |
child 58249 | 180f1b3508ed |
permissions | -rw-r--r-- |
40967
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
wenzelm
parents:
30179
diff
changeset
|
1 |
(* Author: Filip Maric |
30179 | 2 |
|
3 |
Example theory involving Unicode characters (UTF-8 encoding) -- |
|
4 |
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница) |
|
5 |
*) |
|
6 |
||
7 |
header {* A Serbian theory *} |
|
8 |
||
9 |
theory Serbian |
|
10 |
imports Main |
|
11 |
begin |
|
12 |
||
13 |
text{* Serbian cyrillic letters *} |
|
14 |
datatype azbuka = |
|
15 |
azbA ("А") |
|
16 |
| azbB ("Б") |
|
17 |
| azbV ("В") |
|
18 |
| azbG ("Г") |
|
19 |
| azbD ("Д") |
|
20 |
| azbDj ("Ђ") |
|
21 |
| azbE ("Е") |
|
22 |
| azbZv ("Ж") |
|
23 |
| azbZ ("З") |
|
24 |
| azbI ("И") |
|
25 |
| azbJ ("Ј") |
|
26 |
| azbK ("К") |
|
27 |
| azbL ("Л") |
|
28 |
| azbLj ("Љ") |
|
29 |
| azbM ("М") |
|
30 |
| azbN ("Н") |
|
31 |
| azbNj ("Њ") |
|
32 |
| azbO ("О") |
|
33 |
| azbP ("П") |
|
34 |
| azbR ("Р") |
|
35 |
| azbS ("С") |
|
36 |
| azbT ("Т") |
|
37 |
| azbC' ("Ћ") |
|
38 |
| azbU ("У") |
|
39 |
| azbF ("Ф") |
|
40 |
| azbH ("Х") |
|
41 |
| azbC ("Ц") |
|
42 |
| azbCv ("Ч") |
|
43 |
| azbDzv ("Џ") |
|
44 |
| azbSv ("Ш") |
|
45 |
| azbSpc |
|
46 |
||
47 |
thm azbuka.induct |
|
48 |
||
49 |
text{* Serbian latin letters *} |
|
50 |
datatype abeceda = |
|
51 |
abcA ("A") |
|
52 |
| abcB ("B") |
|
53 |
| abcC ("C") |
|
54 |
| abcCv ("Č") |
|
55 |
| abcC' ("Ć") |
|
56 |
| abcD ("D") |
|
57 |
| abcE ("E") |
|
58 |
| abcF ("F") |
|
59 |
| abcG ("G") |
|
60 |
| abcH ("H") |
|
61 |
| abcI ("I") |
|
62 |
| abcJ ("J") |
|
63 |
| abcK ("K") |
|
64 |
| abcL ("L") |
|
65 |
| abcM ("M") |
|
66 |
| abcN ("N") |
|
67 |
| abcO ("O") |
|
68 |
| abcP ("P") |
|
69 |
| abcR ("R") |
|
70 |
| abcS ("S") |
|
71 |
| abcSv ("Š") |
|
72 |
| abcT ("T") |
|
73 |
| abcU ("U") |
|
74 |
| abcV ("V") |
|
75 |
| abcZ ("Z") |
|
76 |
| abcvZ ("Ž") |
|
77 |
| abcSpc |
|
78 |
||
79 |
thm abeceda.induct |
|
80 |
||
81 |
||
82 |
text{* Conversion from cyrillic to latin - |
|
83 |
this conversion is valid in all cases *} |
|
84 |
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list" |
|
85 |
where |
|
86 |
"azb2abc_aux А = [A]" |
|
87 |
| "azb2abc_aux Б = [B]" |
|
88 |
| "azb2abc_aux В = [V]" |
|
89 |
| "azb2abc_aux Г = [G]" |
|
90 |
| "azb2abc_aux Д = [D]" |
|
91 |
| "azb2abc_aux Ђ = [D, J]" |
|
92 |
| "azb2abc_aux Е = [E]" |
|
93 |
| "azb2abc_aux Ж = [Ž]" |
|
94 |
| "azb2abc_aux З = [Z]" |
|
95 |
| "azb2abc_aux И = [I]" |
|
96 |
| "azb2abc_aux Ј = [J]" |
|
97 |
| "azb2abc_aux К = [K]" |
|
98 |
| "azb2abc_aux Л = [L]" |
|
99 |
| "azb2abc_aux Љ = [L, J]" |
|
100 |
| "azb2abc_aux М = [M]" |
|
101 |
| "azb2abc_aux Н = [N]" |
|
102 |
| "azb2abc_aux Њ = [N, J]" |
|
103 |
| "azb2abc_aux О = [O]" |
|
104 |
| "azb2abc_aux П = [P]" |
|
105 |
| "azb2abc_aux Р = [R]" |
|
106 |
| "azb2abc_aux С = [S]" |
|
107 |
| "azb2abc_aux Т = [T]" |
|
108 |
| "azb2abc_aux Ћ = [Ć]" |
|
109 |
| "azb2abc_aux У = [U]" |
|
110 |
| "azb2abc_aux Ф = [F]" |
|
111 |
| "azb2abc_aux Х = [H]" |
|
112 |
| "azb2abc_aux Ц = [C]" |
|
113 |
| "azb2abc_aux Ч = [Č]" |
|
114 |
| "azb2abc_aux Џ = [D, Ž]" |
|
115 |
| "azb2abc_aux Ш = [Š]" |
|
116 |
| "azb2abc_aux azbSpc = [abcSpc]" |
|
117 |
||
118 |
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list" |
|
119 |
where |
|
120 |
"azb2abc [] = []" |
|
121 |
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs" |
|
122 |
||
123 |
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]" |
|
124 |
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]" |
|
125 |
||
126 |
text{* The conversion from latin to cyrillic - |
|
127 |
this conversion is valid in most cases but there are some exceptions *} |
|
128 |
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka" |
|
129 |
where |
|
130 |
"abc2azb_aux A = А" |
|
131 |
| "abc2azb_aux B = Б" |
|
132 |
| "abc2azb_aux C = Ц" |
|
133 |
| "abc2azb_aux Č = Ч" |
|
134 |
| "abc2azb_aux Ć = Ћ" |
|
135 |
| "abc2azb_aux D = Д" |
|
136 |
| "abc2azb_aux E = Е" |
|
137 |
| "abc2azb_aux F = Ф" |
|
138 |
| "abc2azb_aux G = Г" |
|
139 |
| "abc2azb_aux H = Х" |
|
140 |
| "abc2azb_aux I = И" |
|
141 |
| "abc2azb_aux J = Ј" |
|
142 |
| "abc2azb_aux K = К" |
|
143 |
| "abc2azb_aux L = Л" |
|
144 |
| "abc2azb_aux M = М" |
|
145 |
| "abc2azb_aux N = Н" |
|
146 |
| "abc2azb_aux O = О" |
|
147 |
| "abc2azb_aux P = П" |
|
148 |
| "abc2azb_aux R = Р" |
|
149 |
| "abc2azb_aux S = С" |
|
150 |
| "abc2azb_aux Š = Ш" |
|
151 |
| "abc2azb_aux T = Т" |
|
152 |
| "abc2azb_aux U = У" |
|
153 |
| "abc2azb_aux V = В" |
|
154 |
| "abc2azb_aux Z = З" |
|
155 |
| "abc2azb_aux Ž = Ж" |
|
156 |
| "abc2azb_aux abcSpc = azbSpc" |
|
157 |
||
158 |
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list" |
|
159 |
where |
|
160 |
"abc2azb [] = []" |
|
161 |
| "abc2azb [x] = [abc2azb_aux x]" |
|
162 |
| "abc2azb (x1 # x2 # xs) = |
|
163 |
(if x1 = D \<and> x2 = J then |
|
164 |
Ђ # abc2azb xs |
|
165 |
else if x1 = L \<and> x2 = J then |
|
166 |
Љ # abc2azb xs |
|
167 |
else if x1 = N \<and> x2 = J then |
|
168 |
Њ # abc2azb xs |
|
169 |
else if x1 = D \<and> x2 = Ž then |
|
170 |
Џ # abc2azb xs |
|
171 |
else |
|
172 |
abc2azb_aux x1 # abc2azb (x2 # xs) |
|
173 |
)" |
|
174 |
||
175 |
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]" |
|
176 |
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]" |
|
177 |
||
178 |
text{* Here are some invalid conversions *} |
|
179 |
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]" |
|
180 |
by simp |
|
181 |
text{* but it should be: НАДЖИВЕТИ *} |
|
182 |
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]" |
|
183 |
by simp |
|
184 |
text{* but it should be: ИНЈЕКЦИЈА *} |
|
185 |
||
186 |
text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *} |
|
187 |
||
188 |
||
189 |
text{* Idempotency in one direction *} |
|
190 |
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]" |
|
191 |
by (cases x) auto |
|
192 |
||
193 |
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs" |
|
194 |
by (cases xs) auto |
|
195 |
||
196 |
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs" |
|
197 |
by (cases xs) auto |
|
198 |
||
199 |
theorem "azb2abc (abc2azb x) = x" |
|
200 |
proof (induct x) |
|
201 |
case (Cons x1 xs) |
|
202 |
thus ?case |
|
203 |
proof (cases xs) |
|
204 |
case (Cons x2 xss) |
|
205 |
thus ?thesis |
|
206 |
using `azb2abc (abc2azb xs) = xs` |
|
207 |
by auto |
|
208 |
qed simp |
|
209 |
qed simp |
|
210 |
||
211 |
text{* Idempotency in the other direction does not hold *} |
|
212 |
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]" |
|
213 |
by simp |
|
214 |
text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *} |
|
215 |
||
216 |
end |