author | wenzelm |
Sat, 10 Nov 2018 19:01:20 +0100 | |
changeset 69281 | 599b6d0d199b |
parent 66392 | c1a9bcbeeec2 |
permissions | -rw-r--r-- |
40967
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
wenzelm
parents:
30179
diff
changeset
|
1 |
(* Author: Filip Maric |
30179 | 2 |
|
3 |
Example theory involving Unicode characters (UTF-8 encoding) -- |
|
66392 | 4 |
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница). |
30179 | 5 |
*) |
6 |
||
61343 | 7 |
section \<open>A Serbian theory\<close> |
30179 | 8 |
|
9 |
theory Serbian |
|
66392 | 10 |
imports Main |
30179 | 11 |
begin |
12 |
||
66392 | 13 |
text \<open>Serbian cyrillic letters.\<close> |
58310 | 14 |
datatype azbuka = |
30179 | 15 |
azbA ("А") |
16 |
| azbB ("Б") |
|
17 |
| azbV ("В") |
|
18 |
| azbG ("Г") |
|
19 |
| azbD ("Д") |
|
20 |
| azbDj ("Ђ") |
|
21 |
| azbE ("Е") |
|
22 |
| azbZv ("Ж") |
|
23 |
| azbZ ("З") |
|
24 |
| azbI ("И") |
|
25 |
| azbJ ("Ј") |
|
26 |
| azbK ("К") |
|
27 |
| azbL ("Л") |
|
28 |
| azbLj ("Љ") |
|
29 |
| azbM ("М") |
|
30 |
| azbN ("Н") |
|
31 |
| azbNj ("Њ") |
|
32 |
| azbO ("О") |
|
33 |
| azbP ("П") |
|
34 |
| azbR ("Р") |
|
35 |
| azbS ("С") |
|
36 |
| azbT ("Т") |
|
37 |
| azbC' ("Ћ") |
|
38 |
| azbU ("У") |
|
39 |
| azbF ("Ф") |
|
40 |
| azbH ("Х") |
|
41 |
| azbC ("Ц") |
|
42 |
| azbCv ("Ч") |
|
43 |
| azbDzv ("Џ") |
|
44 |
| azbSv ("Ш") |
|
45 |
| azbSpc |
|
46 |
||
47 |
thm azbuka.induct |
|
48 |
||
66392 | 49 |
text \<open>Serbian latin letters.\<close> |
58310 | 50 |
datatype abeceda = |
30179 | 51 |
abcA ("A") |
52 |
| abcB ("B") |
|
53 |
| abcC ("C") |
|
54 |
| abcCv ("Č") |
|
55 |
| abcC' ("Ć") |
|
56 |
| abcD ("D") |
|
57 |
| abcE ("E") |
|
58 |
| abcF ("F") |
|
59 |
| abcG ("G") |
|
60 |
| abcH ("H") |
|
61 |
| abcI ("I") |
|
62 |
| abcJ ("J") |
|
63 |
| abcK ("K") |
|
64 |
| abcL ("L") |
|
65 |
| abcM ("M") |
|
66 |
| abcN ("N") |
|
67 |
| abcO ("O") |
|
68 |
| abcP ("P") |
|
69 |
| abcR ("R") |
|
70 |
| abcS ("S") |
|
71 |
| abcSv ("Š") |
|
72 |
| abcT ("T") |
|
73 |
| abcU ("U") |
|
74 |
| abcV ("V") |
|
75 |
| abcZ ("Z") |
|
76 |
| abcvZ ("Ž") |
|
77 |
| abcSpc |
|
78 |
||
79 |
thm abeceda.induct |
|
80 |
||
81 |
||
66392 | 82 |
text \<open>Conversion from cyrillic to latin -- this conversion is valid in all cases.\<close> |
30179 | 83 |
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list" |
84 |
where |
|
85 |
"azb2abc_aux А = [A]" |
|
86 |
| "azb2abc_aux Б = [B]" |
|
87 |
| "azb2abc_aux В = [V]" |
|
88 |
| "azb2abc_aux Г = [G]" |
|
89 |
| "azb2abc_aux Д = [D]" |
|
90 |
| "azb2abc_aux Ђ = [D, J]" |
|
91 |
| "azb2abc_aux Е = [E]" |
|
92 |
| "azb2abc_aux Ж = [Ž]" |
|
93 |
| "azb2abc_aux З = [Z]" |
|
94 |
| "azb2abc_aux И = [I]" |
|
95 |
| "azb2abc_aux Ј = [J]" |
|
96 |
| "azb2abc_aux К = [K]" |
|
97 |
| "azb2abc_aux Л = [L]" |
|
98 |
| "azb2abc_aux Љ = [L, J]" |
|
99 |
| "azb2abc_aux М = [M]" |
|
100 |
| "azb2abc_aux Н = [N]" |
|
101 |
| "azb2abc_aux Њ = [N, J]" |
|
102 |
| "azb2abc_aux О = [O]" |
|
103 |
| "azb2abc_aux П = [P]" |
|
104 |
| "azb2abc_aux Р = [R]" |
|
105 |
| "azb2abc_aux С = [S]" |
|
106 |
| "azb2abc_aux Т = [T]" |
|
107 |
| "azb2abc_aux Ћ = [Ć]" |
|
108 |
| "azb2abc_aux У = [U]" |
|
109 |
| "azb2abc_aux Ф = [F]" |
|
110 |
| "azb2abc_aux Х = [H]" |
|
111 |
| "azb2abc_aux Ц = [C]" |
|
112 |
| "azb2abc_aux Ч = [Č]" |
|
113 |
| "azb2abc_aux Џ = [D, Ž]" |
|
114 |
| "azb2abc_aux Ш = [Š]" |
|
115 |
| "azb2abc_aux azbSpc = [abcSpc]" |
|
116 |
||
117 |
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list" |
|
118 |
where |
|
119 |
"azb2abc [] = []" |
|
120 |
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs" |
|
121 |
||
122 |
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]" |
|
123 |
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]" |
|
124 |
||
66392 | 125 |
text \<open> |
126 |
The conversion from latin to cyrillic -- |
|
127 |
this conversion is valid in most cases but there are some exceptions.\<close> |
|
30179 | 128 |
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka" |
129 |
where |
|
130 |
"abc2azb_aux A = А" |
|
131 |
| "abc2azb_aux B = Б" |
|
132 |
| "abc2azb_aux C = Ц" |
|
133 |
| "abc2azb_aux Č = Ч" |
|
134 |
| "abc2azb_aux Ć = Ћ" |
|
135 |
| "abc2azb_aux D = Д" |
|
136 |
| "abc2azb_aux E = Е" |
|
137 |
| "abc2azb_aux F = Ф" |
|
138 |
| "abc2azb_aux G = Г" |
|
139 |
| "abc2azb_aux H = Х" |
|
140 |
| "abc2azb_aux I = И" |
|
141 |
| "abc2azb_aux J = Ј" |
|
142 |
| "abc2azb_aux K = К" |
|
143 |
| "abc2azb_aux L = Л" |
|
144 |
| "abc2azb_aux M = М" |
|
145 |
| "abc2azb_aux N = Н" |
|
146 |
| "abc2azb_aux O = О" |
|
147 |
| "abc2azb_aux P = П" |
|
148 |
| "abc2azb_aux R = Р" |
|
149 |
| "abc2azb_aux S = С" |
|
150 |
| "abc2azb_aux Š = Ш" |
|
151 |
| "abc2azb_aux T = Т" |
|
152 |
| "abc2azb_aux U = У" |
|
153 |
| "abc2azb_aux V = В" |
|
154 |
| "abc2azb_aux Z = З" |
|
155 |
| "abc2azb_aux Ž = Ж" |
|
156 |
| "abc2azb_aux abcSpc = azbSpc" |
|
157 |
||
158 |
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list" |
|
159 |
where |
|
160 |
"abc2azb [] = []" |
|
161 |
| "abc2azb [x] = [abc2azb_aux x]" |
|
162 |
| "abc2azb (x1 # x2 # xs) = |
|
66392 | 163 |
(if x1 = D \<and> x2 = J then |
164 |
Ђ # abc2azb xs |
|
165 |
else if x1 = L \<and> x2 = J then |
|
166 |
Љ # abc2azb xs |
|
167 |
else if x1 = N \<and> x2 = J then |
|
168 |
Њ # abc2azb xs |
|
169 |
else if x1 = D \<and> x2 = Ž then |
|
170 |
Џ # abc2azb xs |
|
171 |
else |
|
172 |
abc2azb_aux x1 # abc2azb (x2 # xs))" |
|
30179 | 173 |
|
174 |
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]" |
|
175 |
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]" |
|
176 |
||
66392 | 177 |
text \<open>Here are some invalid conversions.\<close> |
30179 | 178 |
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]" |
179 |
by simp |
|
66392 | 180 |
text \<open>but it should be: НАДЖИВЕТИ\<close> |
30179 | 181 |
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]" |
182 |
by simp |
|
66392 | 183 |
text \<open>but it should be: ИНЈЕКЦИЈА\<close> |
30179 | 184 |
|
66392 | 185 |
text \<open>The conversion fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
30179 | 186 |
|
187 |
||
66392 | 188 |
text \<open>Idempotency in one direction.\<close> |
30179 | 189 |
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]" |
190 |
by (cases x) auto |
|
191 |
||
192 |
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs" |
|
193 |
by (cases xs) auto |
|
194 |
||
195 |
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs" |
|
196 |
by (cases xs) auto |
|
197 |
||
198 |
theorem "azb2abc (abc2azb x) = x" |
|
199 |
proof (induct x) |
|
66392 | 200 |
case Nil |
201 |
then show ?case by simp |
|
202 |
next |
|
30179 | 203 |
case (Cons x1 xs) |
66392 | 204 |
then show ?case |
30179 | 205 |
proof (cases xs) |
66392 | 206 |
case Nil |
207 |
then show ?thesis by simp |
|
208 |
next |
|
30179 | 209 |
case (Cons x2 xss) |
66392 | 210 |
with \<open>azb2abc (abc2azb xs) = xs\<close> show ?thesis |
30179 | 211 |
by auto |
66392 | 212 |
qed |
213 |
qed |
|
30179 | 214 |
|
66392 | 215 |
text \<open>Idempotency in the other direction does not hold.\<close> |
30179 | 216 |
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]" |
217 |
by simp |
|
66392 | 218 |
text \<open>It fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
30179 | 219 |
|
220 |
end |