author | wenzelm |
Fri, 29 Nov 2024 17:40:15 +0100 | |
changeset 81507 | 08574da77b4a |
parent 80914 | d97fdabd9e2b |
permissions | -rw-r--r-- |
40967
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
wenzelm
parents:
30179
diff
changeset
|
1 |
(* Author: Filip Maric |
30179 | 2 |
|
3 |
Example theory involving Unicode characters (UTF-8 encoding) -- |
|
66392 | 4 |
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница). |
30179 | 5 |
*) |
6 |
||
61343 | 7 |
section \<open>A Serbian theory\<close> |
30179 | 8 |
|
9 |
theory Serbian |
|
66392 | 10 |
imports Main |
30179 | 11 |
begin |
12 |
||
66392 | 13 |
text \<open>Serbian cyrillic letters.\<close> |
58310 | 14 |
datatype azbuka = |
80914
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
15 |
azbA (\<open>А\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
16 |
| azbB (\<open>Б\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
17 |
| azbV (\<open>В\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
18 |
| azbG (\<open>Г\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
19 |
| azbD (\<open>Д\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
20 |
| azbDj (\<open>Ђ\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
21 |
| azbE (\<open>Е\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
22 |
| azbZv (\<open>Ж\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
23 |
| azbZ (\<open>З\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
24 |
| azbI (\<open>И\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
25 |
| azbJ (\<open>Ј\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
26 |
| azbK (\<open>К\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
27 |
| azbL (\<open>Л\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
28 |
| azbLj (\<open>Љ\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
29 |
| azbM (\<open>М\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
30 |
| azbN (\<open>Н\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
31 |
| azbNj (\<open>Њ\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
32 |
| azbO (\<open>О\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
33 |
| azbP (\<open>П\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
34 |
| azbR (\<open>Р\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
35 |
| azbS (\<open>С\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
36 |
| azbT (\<open>Т\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
37 |
| azbC' (\<open>Ћ\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
38 |
| azbU (\<open>У\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
39 |
| azbF (\<open>Ф\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
40 |
| azbH (\<open>Х\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
41 |
| azbC (\<open>Ц\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
42 |
| azbCv (\<open>Ч\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
43 |
| azbDzv (\<open>Џ\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
44 |
| azbSv (\<open>Ш\<close>) |
30179 | 45 |
| azbSpc |
46 |
||
47 |
thm azbuka.induct |
|
48 |
||
66392 | 49 |
text \<open>Serbian latin letters.\<close> |
58310 | 50 |
datatype abeceda = |
80914
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
51 |
abcA (\<open>A\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
52 |
| abcB (\<open>B\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
53 |
| abcC (\<open>C\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
54 |
| abcCv (\<open>Č\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
55 |
| abcC' (\<open>Ć\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
56 |
| abcD (\<open>D\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
57 |
| abcE (\<open>E\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
58 |
| abcF (\<open>F\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
59 |
| abcG (\<open>G\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
60 |
| abcH (\<open>H\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
61 |
| abcI (\<open>I\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
62 |
| abcJ (\<open>J\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
63 |
| abcK (\<open>K\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
64 |
| abcL (\<open>L\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
65 |
| abcM (\<open>M\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
66 |
| abcN (\<open>N\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
67 |
| abcO (\<open>O\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
68 |
| abcP (\<open>P\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
69 |
| abcR (\<open>R\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
70 |
| abcS (\<open>S\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
71 |
| abcSv (\<open>Š\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
72 |
| abcT (\<open>T\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
73 |
| abcU (\<open>U\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
74 |
| abcV (\<open>V\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
75 |
| abcZ (\<open>Z\<close>) |
d97fdabd9e2b
standardize mixfix annotations via "isabelle update -a -u mixfix_cartouches" --- to simplify systematic editing;
wenzelm
parents:
66392
diff
changeset
|
76 |
| abcvZ (\<open>Ž\<close>) |
30179 | 77 |
| abcSpc |
78 |
||
79 |
thm abeceda.induct |
|
80 |
||
81 |
||
66392 | 82 |
text \<open>Conversion from cyrillic to latin -- this conversion is valid in all cases.\<close> |
30179 | 83 |
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list" |
84 |
where |
|
85 |
"azb2abc_aux А = [A]" |
|
86 |
| "azb2abc_aux Б = [B]" |
|
87 |
| "azb2abc_aux В = [V]" |
|
88 |
| "azb2abc_aux Г = [G]" |
|
89 |
| "azb2abc_aux Д = [D]" |
|
90 |
| "azb2abc_aux Ђ = [D, J]" |
|
91 |
| "azb2abc_aux Е = [E]" |
|
92 |
| "azb2abc_aux Ж = [Ž]" |
|
93 |
| "azb2abc_aux З = [Z]" |
|
94 |
| "azb2abc_aux И = [I]" |
|
95 |
| "azb2abc_aux Ј = [J]" |
|
96 |
| "azb2abc_aux К = [K]" |
|
97 |
| "azb2abc_aux Л = [L]" |
|
98 |
| "azb2abc_aux Љ = [L, J]" |
|
99 |
| "azb2abc_aux М = [M]" |
|
100 |
| "azb2abc_aux Н = [N]" |
|
101 |
| "azb2abc_aux Њ = [N, J]" |
|
102 |
| "azb2abc_aux О = [O]" |
|
103 |
| "azb2abc_aux П = [P]" |
|
104 |
| "azb2abc_aux Р = [R]" |
|
105 |
| "azb2abc_aux С = [S]" |
|
106 |
| "azb2abc_aux Т = [T]" |
|
107 |
| "azb2abc_aux Ћ = [Ć]" |
|
108 |
| "azb2abc_aux У = [U]" |
|
109 |
| "azb2abc_aux Ф = [F]" |
|
110 |
| "azb2abc_aux Х = [H]" |
|
111 |
| "azb2abc_aux Ц = [C]" |
|
112 |
| "azb2abc_aux Ч = [Č]" |
|
113 |
| "azb2abc_aux Џ = [D, Ž]" |
|
114 |
| "azb2abc_aux Ш = [Š]" |
|
115 |
| "azb2abc_aux azbSpc = [abcSpc]" |
|
116 |
||
117 |
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list" |
|
118 |
where |
|
119 |
"azb2abc [] = []" |
|
120 |
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs" |
|
121 |
||
122 |
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]" |
|
123 |
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]" |
|
124 |
||
66392 | 125 |
text \<open> |
126 |
The conversion from latin to cyrillic -- |
|
127 |
this conversion is valid in most cases but there are some exceptions.\<close> |
|
30179 | 128 |
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka" |
129 |
where |
|
130 |
"abc2azb_aux A = А" |
|
131 |
| "abc2azb_aux B = Б" |
|
132 |
| "abc2azb_aux C = Ц" |
|
133 |
| "abc2azb_aux Č = Ч" |
|
134 |
| "abc2azb_aux Ć = Ћ" |
|
135 |
| "abc2azb_aux D = Д" |
|
136 |
| "abc2azb_aux E = Е" |
|
137 |
| "abc2azb_aux F = Ф" |
|
138 |
| "abc2azb_aux G = Г" |
|
139 |
| "abc2azb_aux H = Х" |
|
140 |
| "abc2azb_aux I = И" |
|
141 |
| "abc2azb_aux J = Ј" |
|
142 |
| "abc2azb_aux K = К" |
|
143 |
| "abc2azb_aux L = Л" |
|
144 |
| "abc2azb_aux M = М" |
|
145 |
| "abc2azb_aux N = Н" |
|
146 |
| "abc2azb_aux O = О" |
|
147 |
| "abc2azb_aux P = П" |
|
148 |
| "abc2azb_aux R = Р" |
|
149 |
| "abc2azb_aux S = С" |
|
150 |
| "abc2azb_aux Š = Ш" |
|
151 |
| "abc2azb_aux T = Т" |
|
152 |
| "abc2azb_aux U = У" |
|
153 |
| "abc2azb_aux V = В" |
|
154 |
| "abc2azb_aux Z = З" |
|
155 |
| "abc2azb_aux Ž = Ж" |
|
156 |
| "abc2azb_aux abcSpc = azbSpc" |
|
157 |
||
158 |
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list" |
|
159 |
where |
|
160 |
"abc2azb [] = []" |
|
161 |
| "abc2azb [x] = [abc2azb_aux x]" |
|
162 |
| "abc2azb (x1 # x2 # xs) = |
|
66392 | 163 |
(if x1 = D \<and> x2 = J then |
164 |
Ђ # abc2azb xs |
|
165 |
else if x1 = L \<and> x2 = J then |
|
166 |
Љ # abc2azb xs |
|
167 |
else if x1 = N \<and> x2 = J then |
|
168 |
Њ # abc2azb xs |
|
169 |
else if x1 = D \<and> x2 = Ž then |
|
170 |
Џ # abc2azb xs |
|
171 |
else |
|
172 |
abc2azb_aux x1 # abc2azb (x2 # xs))" |
|
30179 | 173 |
|
174 |
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]" |
|
175 |
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]" |
|
176 |
||
66392 | 177 |
text \<open>Here are some invalid conversions.\<close> |
30179 | 178 |
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]" |
179 |
by simp |
|
66392 | 180 |
text \<open>but it should be: НАДЖИВЕТИ\<close> |
30179 | 181 |
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]" |
182 |
by simp |
|
66392 | 183 |
text \<open>but it should be: ИНЈЕКЦИЈА\<close> |
30179 | 184 |
|
66392 | 185 |
text \<open>The conversion fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
30179 | 186 |
|
187 |
||
66392 | 188 |
text \<open>Idempotency in one direction.\<close> |
30179 | 189 |
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]" |
190 |
by (cases x) auto |
|
191 |
||
192 |
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs" |
|
193 |
by (cases xs) auto |
|
194 |
||
195 |
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs" |
|
196 |
by (cases xs) auto |
|
197 |
||
198 |
theorem "azb2abc (abc2azb x) = x" |
|
199 |
proof (induct x) |
|
66392 | 200 |
case Nil |
201 |
then show ?case by simp |
|
202 |
next |
|
30179 | 203 |
case (Cons x1 xs) |
66392 | 204 |
then show ?case |
30179 | 205 |
proof (cases xs) |
66392 | 206 |
case Nil |
207 |
then show ?thesis by simp |
|
208 |
next |
|
30179 | 209 |
case (Cons x2 xss) |
66392 | 210 |
with \<open>azb2abc (abc2azb xs) = xs\<close> show ?thesis |
30179 | 211 |
by auto |
66392 | 212 |
qed |
213 |
qed |
|
30179 | 214 |
|
66392 | 215 |
text \<open>Idempotency in the other direction does not hold.\<close> |
30179 | 216 |
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]" |
217 |
by simp |
|
66392 | 218 |
text \<open>It fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close> |
30179 | 219 |
|
220 |
end |