| author | Fabian Huch <huch@in.tum.de> | 
| Wed, 18 Oct 2023 19:53:39 +0200 | |
| changeset 78841 | 7f61688d4e8d | 
| parent 66392 | c1a9bcbeeec2 | 
| child 80914 | d97fdabd9e2b | 
| permissions | -rw-r--r-- | 
| 
40967
 
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
 
wenzelm 
parents: 
30179 
diff
changeset
 | 
1  | 
(* Author: Filip Maric  | 
| 30179 | 2  | 
|
3  | 
Example theory involving Unicode characters (UTF-8 encoding) --  | 
|
| 66392 | 4  | 
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница).  | 
| 30179 | 5  | 
*)  | 
6  | 
||
| 61343 | 7  | 
section \<open>A Serbian theory\<close>  | 
| 30179 | 8  | 
|
9  | 
theory Serbian  | 
|
| 66392 | 10  | 
imports Main  | 
| 30179 | 11  | 
begin  | 
12  | 
||
| 66392 | 13  | 
text \<open>Serbian cyrillic letters.\<close>  | 
| 58310 | 14  | 
datatype azbuka =  | 
| 30179 | 15  | 
  azbA   ("А")
 | 
16  | 
| azbB   ("Б")
 | 
|
17  | 
| azbV   ("В")
 | 
|
18  | 
| azbG   ("Г")
 | 
|
19  | 
| azbD   ("Д")
 | 
|
20  | 
| azbDj  ("Ђ")
 | 
|
21  | 
| azbE   ("Е")
 | 
|
22  | 
| azbZv  ("Ж")
 | 
|
23  | 
| azbZ   ("З")
 | 
|
24  | 
| azbI   ("И")
 | 
|
25  | 
| azbJ   ("Ј")
 | 
|
26  | 
| azbK   ("К")
 | 
|
27  | 
| azbL   ("Л")
 | 
|
28  | 
| azbLj  ("Љ")
 | 
|
29  | 
| azbM   ("М")
 | 
|
30  | 
| azbN   ("Н")
 | 
|
31  | 
| azbNj  ("Њ")
 | 
|
32  | 
| azbO   ("О")
 | 
|
33  | 
| azbP   ("П")
 | 
|
34  | 
| azbR   ("Р")
 | 
|
35  | 
| azbS   ("С")
 | 
|
36  | 
| azbT   ("Т")
 | 
|
37  | 
| azbC'  ("Ћ")
 | 
|
38  | 
| azbU   ("У")
 | 
|
39  | 
| azbF   ("Ф")
 | 
|
40  | 
| azbH   ("Х")
 | 
|
41  | 
| azbC   ("Ц")
 | 
|
42  | 
| azbCv  ("Ч")
 | 
|
43  | 
| azbDzv ("Џ")
 | 
|
44  | 
| azbSv  ("Ш")
 | 
|
45  | 
| azbSpc  | 
|
46  | 
||
47  | 
thm azbuka.induct  | 
|
48  | 
||
| 66392 | 49  | 
text \<open>Serbian latin letters.\<close>  | 
| 58310 | 50  | 
datatype abeceda =  | 
| 30179 | 51  | 
  abcA   ("A")
 | 
52  | 
| abcB   ("B")
 | 
|
53  | 
| abcC   ("C")
 | 
|
54  | 
| abcCv  ("Č")
 | 
|
55  | 
| abcC'  ("Ć")
 | 
|
56  | 
| abcD   ("D")
 | 
|
57  | 
| abcE   ("E")
 | 
|
58  | 
| abcF   ("F")
 | 
|
59  | 
| abcG   ("G")
 | 
|
60  | 
| abcH   ("H")
 | 
|
61  | 
| abcI   ("I")
 | 
|
62  | 
| abcJ   ("J")
 | 
|
63  | 
| abcK   ("K")
 | 
|
64  | 
| abcL   ("L")
 | 
|
65  | 
| abcM   ("M")
 | 
|
66  | 
| abcN   ("N")
 | 
|
67  | 
| abcO   ("O")
 | 
|
68  | 
| abcP   ("P")
 | 
|
69  | 
| abcR   ("R")
 | 
|
70  | 
| abcS   ("S")
 | 
|
71  | 
| abcSv  ("Š")
 | 
|
72  | 
| abcT   ("T")
 | 
|
73  | 
| abcU   ("U")
 | 
|
74  | 
| abcV   ("V")
 | 
|
75  | 
| abcZ   ("Z")
 | 
|
76  | 
| abcvZ  ("Ž")
 | 
|
77  | 
| abcSpc  | 
|
78  | 
||
79  | 
thm abeceda.induct  | 
|
80  | 
||
81  | 
||
| 66392 | 82  | 
text \<open>Conversion from cyrillic to latin -- this conversion is valid in all cases.\<close>  | 
| 30179 | 83  | 
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"  | 
84  | 
where  | 
|
85  | 
"azb2abc_aux А = [A]"  | 
|
86  | 
| "azb2abc_aux Б = [B]"  | 
|
87  | 
| "azb2abc_aux В = [V]"  | 
|
88  | 
| "azb2abc_aux Г = [G]"  | 
|
89  | 
| "azb2abc_aux Д = [D]"  | 
|
90  | 
| "azb2abc_aux Ђ = [D, J]"  | 
|
91  | 
| "azb2abc_aux Е = [E]"  | 
|
92  | 
| "azb2abc_aux Ж = [Ž]"  | 
|
93  | 
| "azb2abc_aux З = [Z]"  | 
|
94  | 
| "azb2abc_aux И = [I]"  | 
|
95  | 
| "azb2abc_aux Ј = [J]"  | 
|
96  | 
| "azb2abc_aux К = [K]"  | 
|
97  | 
| "azb2abc_aux Л = [L]"  | 
|
98  | 
| "azb2abc_aux Љ = [L, J]"  | 
|
99  | 
| "azb2abc_aux М = [M]"  | 
|
100  | 
| "azb2abc_aux Н = [N]"  | 
|
101  | 
| "azb2abc_aux Њ = [N, J]"  | 
|
102  | 
| "azb2abc_aux О = [O]"  | 
|
103  | 
| "azb2abc_aux П = [P]"  | 
|
104  | 
| "azb2abc_aux Р = [R]"  | 
|
105  | 
| "azb2abc_aux С = [S]"  | 
|
106  | 
| "azb2abc_aux Т = [T]"  | 
|
107  | 
| "azb2abc_aux Ћ = [Ć]"  | 
|
108  | 
| "azb2abc_aux У = [U]"  | 
|
109  | 
| "azb2abc_aux Ф = [F]"  | 
|
110  | 
| "azb2abc_aux Х = [H]"  | 
|
111  | 
| "azb2abc_aux Ц = [C]"  | 
|
112  | 
| "azb2abc_aux Ч = [Č]"  | 
|
113  | 
| "azb2abc_aux Џ = [D, Ž]"  | 
|
114  | 
| "azb2abc_aux Ш = [Š]"  | 
|
115  | 
| "azb2abc_aux azbSpc = [abcSpc]"  | 
|
116  | 
||
117  | 
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"  | 
|
118  | 
where  | 
|
119  | 
"azb2abc [] = []"  | 
|
120  | 
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"  | 
|
121  | 
||
122  | 
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"  | 
|
123  | 
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"  | 
|
124  | 
||
| 66392 | 125  | 
text \<open>  | 
126  | 
The conversion from latin to cyrillic --  | 
|
127  | 
this conversion is valid in most cases but there are some exceptions.\<close>  | 
|
| 30179 | 128  | 
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"  | 
129  | 
where  | 
|
130  | 
"abc2azb_aux A = А"  | 
|
131  | 
| "abc2azb_aux B = Б"  | 
|
132  | 
| "abc2azb_aux C = Ц"  | 
|
133  | 
| "abc2azb_aux Č = Ч"  | 
|
134  | 
| "abc2azb_aux Ć = Ћ"  | 
|
135  | 
| "abc2azb_aux D = Д"  | 
|
136  | 
| "abc2azb_aux E = Е"  | 
|
137  | 
| "abc2azb_aux F = Ф"  | 
|
138  | 
| "abc2azb_aux G = Г"  | 
|
139  | 
| "abc2azb_aux H = Х"  | 
|
140  | 
| "abc2azb_aux I = И"  | 
|
141  | 
| "abc2azb_aux J = Ј"  | 
|
142  | 
| "abc2azb_aux K = К"  | 
|
143  | 
| "abc2azb_aux L = Л"  | 
|
144  | 
| "abc2azb_aux M = М"  | 
|
145  | 
| "abc2azb_aux N = Н"  | 
|
146  | 
| "abc2azb_aux O = О"  | 
|
147  | 
| "abc2azb_aux P = П"  | 
|
148  | 
| "abc2azb_aux R = Р"  | 
|
149  | 
| "abc2azb_aux S = С"  | 
|
150  | 
| "abc2azb_aux Š = Ш"  | 
|
151  | 
| "abc2azb_aux T = Т"  | 
|
152  | 
| "abc2azb_aux U = У"  | 
|
153  | 
| "abc2azb_aux V = В"  | 
|
154  | 
| "abc2azb_aux Z = З"  | 
|
155  | 
| "abc2azb_aux Ž = Ж"  | 
|
156  | 
| "abc2azb_aux abcSpc = azbSpc"  | 
|
157  | 
||
158  | 
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"  | 
|
159  | 
where  | 
|
160  | 
"abc2azb [] = []"  | 
|
161  | 
| "abc2azb [x] = [abc2azb_aux x]"  | 
|
162  | 
| "abc2azb (x1 # x2 # xs) =  | 
|
| 66392 | 163  | 
(if x1 = D \<and> x2 = J then  | 
164  | 
Ђ # abc2azb xs  | 
|
165  | 
else if x1 = L \<and> x2 = J then  | 
|
166  | 
Љ # abc2azb xs  | 
|
167  | 
else if x1 = N \<and> x2 = J then  | 
|
168  | 
Њ # abc2azb xs  | 
|
169  | 
else if x1 = D \<and> x2 = Ž then  | 
|
170  | 
Џ # abc2azb xs  | 
|
171  | 
else  | 
|
172  | 
abc2azb_aux x1 # abc2azb (x2 # xs))"  | 
|
| 30179 | 173  | 
|
174  | 
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"  | 
|
175  | 
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"  | 
|
176  | 
||
| 66392 | 177  | 
text \<open>Here are some invalid conversions.\<close>  | 
| 30179 | 178  | 
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"  | 
179  | 
by simp  | 
|
| 66392 | 180  | 
text \<open>but it should be: НАДЖИВЕТИ\<close>  | 
| 30179 | 181  | 
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"  | 
182  | 
by simp  | 
|
| 66392 | 183  | 
text \<open>but it should be: ИНЈЕКЦИЈА\<close>  | 
| 30179 | 184  | 
|
| 66392 | 185  | 
text \<open>The conversion fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close>  | 
| 30179 | 186  | 
|
187  | 
||
| 66392 | 188  | 
text \<open>Idempotency in one direction.\<close>  | 
| 30179 | 189  | 
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"  | 
190  | 
by (cases x) auto  | 
|
191  | 
||
192  | 
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"  | 
|
193  | 
by (cases xs) auto  | 
|
194  | 
||
195  | 
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"  | 
|
196  | 
by (cases xs) auto  | 
|
197  | 
||
198  | 
theorem "azb2abc (abc2azb x) = x"  | 
|
199  | 
proof (induct x)  | 
|
| 66392 | 200  | 
case Nil  | 
201  | 
then show ?case by simp  | 
|
202  | 
next  | 
|
| 30179 | 203  | 
case (Cons x1 xs)  | 
| 66392 | 204  | 
then show ?case  | 
| 30179 | 205  | 
proof (cases xs)  | 
| 66392 | 206  | 
case Nil  | 
207  | 
then show ?thesis by simp  | 
|
208  | 
next  | 
|
| 30179 | 209  | 
case (Cons x2 xss)  | 
| 66392 | 210  | 
with \<open>azb2abc (abc2azb xs) = xs\<close> show ?thesis  | 
| 30179 | 211  | 
by auto  | 
| 66392 | 212  | 
qed  | 
213  | 
qed  | 
|
| 30179 | 214  | 
|
| 66392 | 215  | 
text \<open>Idempotency in the other direction does not hold.\<close>  | 
| 30179 | 216  | 
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"  | 
217  | 
by simp  | 
|
| 66392 | 218  | 
text \<open>It fails for all cyrillic words that contain НЈ ЛЈ ДЈ ДЖ.\<close>  | 
| 30179 | 219  | 
|
220  | 
end  |