| author | wenzelm |
| Mon, 25 Sep 2023 20:56:44 +0200 | |
| changeset 78709 | ebafb2daabb7 |
| parent 66392 | c1a9bcbeeec2 |
| child 80914 | d97fdabd9e2b |
| permissions | -rw-r--r-- |
|
40967
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
wenzelm
parents:
30179
diff
changeset
|
1 |
(* Author: Filip Maric |
| 30179 | 2 |
|
3 |
Example theory involving Unicode characters (UTF-8 encoding) -- |
|
| 66392 | 4 |
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница). |
| 30179 | 5 |
*) |
6 |
||
| 61343 | 7 |
section \<open>A Serbian theory\<close> |
| 30179 | 8 |
|
9 |
theory Serbian |
|
| 66392 | 10 |
imports Main |
| 30179 | 11 |
begin |
12 |
||
| 66392 | 13 |
text \<open>Serbian cyrillic letters.\<close> |
| 58310 | 14 |
datatype azbuka = |
| 30179 | 15 |
azbA ("А")
|
16 |
| azbB ("Б")
|
|
17 |
| azbV ("В")
|
|
18 |
| azbG ("Г")
|
|
19 |
| azbD ("Д")
|
|
20 |
| azbDj ("Ђ")
|
|
21 |
| azbE ("Е")
|
|
22 |
| azbZv ("Ж")
|
|
23 |
| azbZ ("З")
|
|
24 |
| azbI ("И")
|
|
25 |
| azbJ ("Ј")
|
|
26 |
| azbK ("К")
|
|
27 |
| azbL ("Л")
|
|
28 |
| azbLj ("Љ")
|
|
29 |
| azbM ("М")
|
|
30 |
| azbN ("Н")
|
|
31 |
| azbNj ("Њ")
|
|
32 |
| azbO ("О")
|
|
33 |
| azbP ("П")
|
|
34 |
| azbR ("Р")
|
|
35 |
| azbS ("С")
|
|
36 |
| azbT ("Т")
|
|
37 |
| azbC' ("Ћ")
|
|
38 |
| azbU ("У")
|
|
39 |
| azbF ("Ф")
|
|
40 |
| azbH ("Х")
|
|
41 |
| azbC ("Ц")
|
|
42 |
| azbCv ("Ч")
|
|
43 |
| azbDzv ("Џ")
|
|
44 |
| azbSv ("Ш")
|
|
45 |
| azbSpc |
|
46 |
||
47 |
thm azbuka.induct |
|
48 |
||
| 66392 | 49 |
text \<open>Serbian latin letters.\<close> |
| 58310 | 50 |
datatype abeceda = |
| 30179 | 51 |
abcA ("A")
|
52 |
| abcB ("B")
|
|
53 |
| abcC ("C")
|
|
54 |
| abcCv ("Č")
|
|
55 |
| abcC' ("Ć")
|
|
56 |
| abcD ("D")
|
|
57 |
| abcE ("E")
|
|
58 |
| abcF ("F")
|
|
59 |
| abcG ("G")
|
|
60 |
| abcH ("H")
|
|
61 |
| abcI ("I")
|
|
62 |
| abcJ ("J")
|
|
63 |
| abcK ("K")
|
|
64 |
| abcL ("L")
|
|
65 |
| abcM ("M")
|
|
66 |
| abcN ("N")
|
|
67 |
| abcO ("O")
|
|
68 |
| abcP ("P")
|
|
69 |
| abcR ("R")
|
|
70 |
| abcS ("S")
|
|
71 |
| abcSv ("Š")
|
|
72 |
| abcT ("T")
|
|
73 |
| abcU ("U")
|
|
74 |
| abcV ("V")
|
|
75 |
| abcZ ("Z")
|
|
76 |
| abcvZ ("Ž")
|
|
77 |
| abcSpc |
|
78 |
||
79 |
thm abeceda.induct |
|
80 |
||
81 |
||
| 66392 | 82 |
text \<open>Conversion from cyrillic to latin -- this conversion is valid in all cases.\<close> |
| 30179 | 83 |
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list" |
84 |
where |
|
85 |
"azb2abc_aux А = [A]" |
|
86 |
| "azb2abc_aux Б = [B]" |
|
87 |
| "azb2abc_aux В = [V]" |
|
88 |
| "azb2abc_aux Г = [G]" |
|
89 |
| "azb2abc_aux Д = [D]" |
|
90 |
| "azb2abc_aux Ђ = [D, J]" |
|
91 |
| "azb2abc_aux Е = [E]" |
|
92 |
| "azb2abc_aux Ж = [Ž]" |
|
93 |
| "azb2abc_aux З = [Z]" |
|
94 |
| "azb2abc_aux И = [I]" |
|
95 |
| "azb2abc_aux Ј = [J]" |
|
96 |
| "azb2abc_aux К = [K]" |
|
97 |
| "azb2abc_aux Л = [L]" |
|
98 |
| "azb2abc_aux Љ = [L, J]" |
|
99 |
| "azb2abc_aux М = [M]" |
|
100 |
| "azb2abc_aux Н = [N]" |
|
101 |
| "azb2abc_aux Њ = [N, J]" |
|
102 |
| "azb2abc_aux О = [O]" |
|
103 |
| "azb2abc_aux П = [P]" |
|
104 |
| "azb2abc_aux Р = [R]" |
|
105 |
| "azb2abc_aux С = [S]" |
|
106 |
| "azb2abc_aux Т = [T]" |
|
107 |
| "azb2abc_aux Ћ = [Ć]" |
|
108 |
| "azb2abc_aux У = [U]" |
|
109 |
| "azb2abc_aux Ф = [F]" |
|
110 |
| "azb2abc_aux Х = [H]" |
|
111 |
| "azb2abc_aux Ц = [C]" |
|
112 |
| "azb2abc_aux Ч = [Č]" |
|
113 |
| "azb2abc_aux Џ = [D, Ž]" |
|
114 |
| "azb2abc_aux Ш = [Š]" |
|
115 |
| "azb2abc_aux azbSpc = [abcSpc]" |
|
116 |
||
117 |
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list" |
|
118 |
where |
|
119 |
"azb2abc [] = []" |
|
120 |
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs" |
|
121 |
||
122 |
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]" |
|
123 |
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]" |
|
124 |
||
| 66392 | 125 |
text \<open> |
126 |
The conversion from latin to cyrillic -- |
|
127 |
this conversion is valid in most cases but there are some exceptions.\<close> |
|
| 30179 | 128 |
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka" |
129 |
where |
|
130 |
"abc2azb_aux A = А" |
|
131 |
| "abc2azb_aux B = Б" |
|
132 |
| "abc2azb_aux C = Ц" |
|
133 |
| "abc2azb_aux Č = Ч" |
|
134 |
| "abc2azb_aux Ć = Ћ" |
|
135 |
| "abc2azb_aux D = Д" |
|
136 |
| "abc2azb_aux E = Е" |
|
137 |
| "abc2azb_aux F = Ф" |
|
138 |
| "abc2azb_aux G = Г" |
|
139 |
| "abc2azb_aux H = Х" |
|
140 |
| "abc2azb_aux I = И" |
|
141 |
| "abc2azb_aux J = Ј" |
|
142 |
| "abc2azb_aux K = К" |
|
143 |
| "abc2azb_aux L = Л" |
|
144 |
| "abc2azb_aux M = М" |
|
145 |
| "abc2azb_aux N = Н" |
|
146 |
| "abc2azb_aux O = О" |
|
147 |
| "abc2azb_aux P = П" |
|
148 |
| "abc2azb_aux R = Р" |
|
149 |
| "abc2azb_aux S = С" |
|
150 |
| "abc2azb_aux Š = Ш" |
|
151 |
| "abc2azb_aux T = Т" |
|
152 |
| "abc2azb_aux U = У" |
|
153 |
| "abc2azb_aux V = В" |
|
154 |
| "abc2azb_aux Z = З" |
|
155 |
| "abc2azb_aux Ž = Ж" |
|
156 |
| "abc2azb_aux abcSpc = azbSpc" |
|
157 |
||
158 |
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list" |
|
159 |
where |
|
160 |
"abc2azb [] = []" |
|
161 |
| "abc2azb [x] = [abc2azb_aux x]" |
|
162 |
| "abc2azb (x1 # x2 # xs) = |
|
| 66392 | 163 |
(if x1 = D \<and> x2 = J then |
164 |
Ђ # abc2azb xs |
|
165 |
else if x1 = L \<and> x2 = J then |
|
166 |
Љ # abc2azb xs |
|
167 |
else if x1 = N \<and> x2 = J then |
|
168 |
Њ # abc2azb xs |
|
169 |
else if x1 = D \<and> x2 = Ž then |
|
170 |
Џ # abc2azb xs |
|
171 |
else |
|
172 |
abc2azb_aux x1 # abc2azb (x2 # xs))" |
|
| 30179 | 173 |
|
174 |
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]" |
|
175 |
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]" |
|
176 |
||
| 66392 | 177 |
text \<open>Here are some invalid conversions.\<close> |
| 30179 | 178 |
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]" |
179 |
by simp |
|
| 66392 | 180 |
text \<open>but it should be: НАДЖИВЕТИ\<close> |
| 30179 | 181 |
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]" |
182 |
by simp |
|
| 66392 | 183 |
text \<open>but it should be: ИНЈЕКЦИЈА\<close> |
| 30179 | 184 |
|
| 66392 | 185 |
text \<open>The conversion fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
| 30179 | 186 |
|
187 |
||
| 66392 | 188 |
text \<open>Idempotency in one direction.\<close> |
| 30179 | 189 |
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]" |
190 |
by (cases x) auto |
|
191 |
||
192 |
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs" |
|
193 |
by (cases xs) auto |
|
194 |
||
195 |
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs" |
|
196 |
by (cases xs) auto |
|
197 |
||
198 |
theorem "azb2abc (abc2azb x) = x" |
|
199 |
proof (induct x) |
|
| 66392 | 200 |
case Nil |
201 |
then show ?case by simp |
|
202 |
next |
|
| 30179 | 203 |
case (Cons x1 xs) |
| 66392 | 204 |
then show ?case |
| 30179 | 205 |
proof (cases xs) |
| 66392 | 206 |
case Nil |
207 |
then show ?thesis by simp |
|
208 |
next |
|
| 30179 | 209 |
case (Cons x2 xss) |
| 66392 | 210 |
with \<open>azb2abc (abc2azb xs) = xs\<close> show ?thesis |
| 30179 | 211 |
by auto |
| 66392 | 212 |
qed |
213 |
qed |
|
| 30179 | 214 |
|
| 66392 | 215 |
text \<open>Idempotency in the other direction does not hold.\<close> |
| 30179 | 216 |
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]" |
217 |
by simp |
|
| 66392 | 218 |
text \<open>It fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
| 30179 | 219 |
|
220 |
end |