30179
|
1 |
(* -*- coding: utf-8 -*- :encoding=utf-8:
|
|
2 |
Author: Filip Maric
|
|
3 |
|
|
4 |
Example theory involving Unicode characters (UTF-8 encoding) --
|
|
5 |
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
|
|
6 |
*)
|
|
7 |
|
|
8 |
header {* A Serbian theory *}
|
|
9 |
|
|
10 |
theory Serbian
|
|
11 |
imports Main
|
|
12 |
begin
|
|
13 |
|
|
14 |
text{* Serbian cyrillic letters *}
|
|
15 |
datatype azbuka =
|
|
16 |
azbA ("А")
|
|
17 |
| azbB ("Б")
|
|
18 |
| azbV ("В")
|
|
19 |
| azbG ("Г")
|
|
20 |
| azbD ("Д")
|
|
21 |
| azbDj ("Ђ")
|
|
22 |
| azbE ("Е")
|
|
23 |
| azbZv ("Ж")
|
|
24 |
| azbZ ("З")
|
|
25 |
| azbI ("И")
|
|
26 |
| azbJ ("Ј")
|
|
27 |
| azbK ("К")
|
|
28 |
| azbL ("Л")
|
|
29 |
| azbLj ("Љ")
|
|
30 |
| azbM ("М")
|
|
31 |
| azbN ("Н")
|
|
32 |
| azbNj ("Њ")
|
|
33 |
| azbO ("О")
|
|
34 |
| azbP ("П")
|
|
35 |
| azbR ("Р")
|
|
36 |
| azbS ("С")
|
|
37 |
| azbT ("Т")
|
|
38 |
| azbC' ("Ћ")
|
|
39 |
| azbU ("У")
|
|
40 |
| azbF ("Ф")
|
|
41 |
| azbH ("Х")
|
|
42 |
| azbC ("Ц")
|
|
43 |
| azbCv ("Ч")
|
|
44 |
| azbDzv ("Џ")
|
|
45 |
| azbSv ("Ш")
|
|
46 |
| azbSpc
|
|
47 |
|
|
48 |
thm azbuka.induct
|
|
49 |
|
|
50 |
text{* Serbian latin letters *}
|
|
51 |
datatype abeceda =
|
|
52 |
abcA ("A")
|
|
53 |
| abcB ("B")
|
|
54 |
| abcC ("C")
|
|
55 |
| abcCv ("Č")
|
|
56 |
| abcC' ("Ć")
|
|
57 |
| abcD ("D")
|
|
58 |
| abcE ("E")
|
|
59 |
| abcF ("F")
|
|
60 |
| abcG ("G")
|
|
61 |
| abcH ("H")
|
|
62 |
| abcI ("I")
|
|
63 |
| abcJ ("J")
|
|
64 |
| abcK ("K")
|
|
65 |
| abcL ("L")
|
|
66 |
| abcM ("M")
|
|
67 |
| abcN ("N")
|
|
68 |
| abcO ("O")
|
|
69 |
| abcP ("P")
|
|
70 |
| abcR ("R")
|
|
71 |
| abcS ("S")
|
|
72 |
| abcSv ("Š")
|
|
73 |
| abcT ("T")
|
|
74 |
| abcU ("U")
|
|
75 |
| abcV ("V")
|
|
76 |
| abcZ ("Z")
|
|
77 |
| abcvZ ("Ž")
|
|
78 |
| abcSpc
|
|
79 |
|
|
80 |
thm abeceda.induct
|
|
81 |
|
|
82 |
|
|
83 |
text{* Conversion from cyrillic to latin -
|
|
84 |
this conversion is valid in all cases *}
|
|
85 |
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
|
|
86 |
where
|
|
87 |
"azb2abc_aux А = [A]"
|
|
88 |
| "azb2abc_aux Б = [B]"
|
|
89 |
| "azb2abc_aux В = [V]"
|
|
90 |
| "azb2abc_aux Г = [G]"
|
|
91 |
| "azb2abc_aux Д = [D]"
|
|
92 |
| "azb2abc_aux Ђ = [D, J]"
|
|
93 |
| "azb2abc_aux Е = [E]"
|
|
94 |
| "azb2abc_aux Ж = [Ž]"
|
|
95 |
| "azb2abc_aux З = [Z]"
|
|
96 |
| "azb2abc_aux И = [I]"
|
|
97 |
| "azb2abc_aux Ј = [J]"
|
|
98 |
| "azb2abc_aux К = [K]"
|
|
99 |
| "azb2abc_aux Л = [L]"
|
|
100 |
| "azb2abc_aux Љ = [L, J]"
|
|
101 |
| "azb2abc_aux М = [M]"
|
|
102 |
| "azb2abc_aux Н = [N]"
|
|
103 |
| "azb2abc_aux Њ = [N, J]"
|
|
104 |
| "azb2abc_aux О = [O]"
|
|
105 |
| "azb2abc_aux П = [P]"
|
|
106 |
| "azb2abc_aux Р = [R]"
|
|
107 |
| "azb2abc_aux С = [S]"
|
|
108 |
| "azb2abc_aux Т = [T]"
|
|
109 |
| "azb2abc_aux Ћ = [Ć]"
|
|
110 |
| "azb2abc_aux У = [U]"
|
|
111 |
| "azb2abc_aux Ф = [F]"
|
|
112 |
| "azb2abc_aux Х = [H]"
|
|
113 |
| "azb2abc_aux Ц = [C]"
|
|
114 |
| "azb2abc_aux Ч = [Č]"
|
|
115 |
| "azb2abc_aux Џ = [D, Ž]"
|
|
116 |
| "azb2abc_aux Ш = [Š]"
|
|
117 |
| "azb2abc_aux azbSpc = [abcSpc]"
|
|
118 |
|
|
119 |
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
|
|
120 |
where
|
|
121 |
"azb2abc [] = []"
|
|
122 |
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
|
|
123 |
|
|
124 |
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
|
|
125 |
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
|
|
126 |
|
|
127 |
text{* The conversion from latin to cyrillic -
|
|
128 |
this conversion is valid in most cases but there are some exceptions *}
|
|
129 |
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
|
|
130 |
where
|
|
131 |
"abc2azb_aux A = А"
|
|
132 |
| "abc2azb_aux B = Б"
|
|
133 |
| "abc2azb_aux C = Ц"
|
|
134 |
| "abc2azb_aux Č = Ч"
|
|
135 |
| "abc2azb_aux Ć = Ћ"
|
|
136 |
| "abc2azb_aux D = Д"
|
|
137 |
| "abc2azb_aux E = Е"
|
|
138 |
| "abc2azb_aux F = Ф"
|
|
139 |
| "abc2azb_aux G = Г"
|
|
140 |
| "abc2azb_aux H = Х"
|
|
141 |
| "abc2azb_aux I = И"
|
|
142 |
| "abc2azb_aux J = Ј"
|
|
143 |
| "abc2azb_aux K = К"
|
|
144 |
| "abc2azb_aux L = Л"
|
|
145 |
| "abc2azb_aux M = М"
|
|
146 |
| "abc2azb_aux N = Н"
|
|
147 |
| "abc2azb_aux O = О"
|
|
148 |
| "abc2azb_aux P = П"
|
|
149 |
| "abc2azb_aux R = Р"
|
|
150 |
| "abc2azb_aux S = С"
|
|
151 |
| "abc2azb_aux Š = Ш"
|
|
152 |
| "abc2azb_aux T = Т"
|
|
153 |
| "abc2azb_aux U = У"
|
|
154 |
| "abc2azb_aux V = В"
|
|
155 |
| "abc2azb_aux Z = З"
|
|
156 |
| "abc2azb_aux Ž = Ж"
|
|
157 |
| "abc2azb_aux abcSpc = azbSpc"
|
|
158 |
|
|
159 |
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
|
|
160 |
where
|
|
161 |
"abc2azb [] = []"
|
|
162 |
| "abc2azb [x] = [abc2azb_aux x]"
|
|
163 |
| "abc2azb (x1 # x2 # xs) =
|
|
164 |
(if x1 = D \<and> x2 = J then
|
|
165 |
Ђ # abc2azb xs
|
|
166 |
else if x1 = L \<and> x2 = J then
|
|
167 |
Љ # abc2azb xs
|
|
168 |
else if x1 = N \<and> x2 = J then
|
|
169 |
Њ # abc2azb xs
|
|
170 |
else if x1 = D \<and> x2 = Ž then
|
|
171 |
Џ # abc2azb xs
|
|
172 |
else
|
|
173 |
abc2azb_aux x1 # abc2azb (x2 # xs)
|
|
174 |
)"
|
|
175 |
|
|
176 |
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
|
|
177 |
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
|
|
178 |
|
|
179 |
text{* Here are some invalid conversions *}
|
|
180 |
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
|
|
181 |
by simp
|
|
182 |
text{* but it should be: НАДЖИВЕТИ *}
|
|
183 |
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
|
|
184 |
by simp
|
|
185 |
text{* but it should be: ИНЈЕКЦИЈА *}
|
|
186 |
|
|
187 |
text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
|
|
188 |
|
|
189 |
|
|
190 |
text{* Idempotency in one direction *}
|
|
191 |
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
|
|
192 |
by (cases x) auto
|
|
193 |
|
|
194 |
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
|
|
195 |
by (cases xs) auto
|
|
196 |
|
|
197 |
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
|
|
198 |
by (cases xs) auto
|
|
199 |
|
|
200 |
theorem "azb2abc (abc2azb x) = x"
|
|
201 |
proof (induct x)
|
|
202 |
case (Cons x1 xs)
|
|
203 |
thus ?case
|
|
204 |
proof (cases xs)
|
|
205 |
case (Cons x2 xss)
|
|
206 |
thus ?thesis
|
|
207 |
using `azb2abc (abc2azb xs) = xs`
|
|
208 |
by auto
|
|
209 |
qed simp
|
|
210 |
qed simp
|
|
211 |
|
|
212 |
text{* Idempotency in the other direction does not hold *}
|
|
213 |
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
|
|
214 |
by simp
|
|
215 |
text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
|
|
216 |
|
|
217 |
end
|