| author | wenzelm | 
| Sat, 05 Apr 2014 23:17:30 +0200 | |
| changeset 56429 | bc61161a5bd0 | 
| parent 40967 | 5eb59b62e7de | 
| child 58249 | 180f1b3508ed | 
| permissions | -rw-r--r-- | 
| 40967 
5eb59b62e7de
avoid explicit encoding -- acknowledge UTF-8 as global default and Isabelle/jEdit preference of UTF-8-Isabelle;
 wenzelm parents: 
30179diff
changeset | 1 | (* Author: Filip Maric | 
| 30179 | 2 | |
| 3 | Example theory involving Unicode characters (UTF-8 encoding) -- | |
| 4 | Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница) | |
| 5 | *) | |
| 6 | ||
| 7 | header {* A Serbian theory *}
 | |
| 8 | ||
| 9 | theory Serbian | |
| 10 | imports Main | |
| 11 | begin | |
| 12 | ||
| 13 | text{* Serbian cyrillic letters *}
 | |
| 14 | datatype azbuka = | |
| 15 |   azbA   ("А")
 | |
| 16 | | azbB   ("Б")
 | |
| 17 | | azbV   ("В")
 | |
| 18 | | azbG   ("Г")
 | |
| 19 | | azbD   ("Д")
 | |
| 20 | | azbDj  ("Ђ")
 | |
| 21 | | azbE   ("Е")
 | |
| 22 | | azbZv  ("Ж")
 | |
| 23 | | azbZ   ("З")
 | |
| 24 | | azbI   ("И")
 | |
| 25 | | azbJ   ("Ј")
 | |
| 26 | | azbK   ("К")
 | |
| 27 | | azbL   ("Л")
 | |
| 28 | | azbLj  ("Љ")
 | |
| 29 | | azbM   ("М")
 | |
| 30 | | azbN   ("Н")
 | |
| 31 | | azbNj  ("Њ")
 | |
| 32 | | azbO   ("О")
 | |
| 33 | | azbP   ("П")
 | |
| 34 | | azbR   ("Р")
 | |
| 35 | | azbS   ("С")
 | |
| 36 | | azbT   ("Т")
 | |
| 37 | | azbC'  ("Ћ")
 | |
| 38 | | azbU   ("У")
 | |
| 39 | | azbF   ("Ф")
 | |
| 40 | | azbH   ("Х")
 | |
| 41 | | azbC   ("Ц")
 | |
| 42 | | azbCv  ("Ч")
 | |
| 43 | | azbDzv ("Џ")
 | |
| 44 | | azbSv  ("Ш")
 | |
| 45 | | azbSpc | |
| 46 | ||
| 47 | thm azbuka.induct | |
| 48 | ||
| 49 | text{* Serbian latin letters *}
 | |
| 50 | datatype abeceda = | |
| 51 |   abcA   ("A")
 | |
| 52 | | abcB   ("B")
 | |
| 53 | | abcC   ("C")
 | |
| 54 | | abcCv  ("Č")
 | |
| 55 | | abcC'  ("Ć")
 | |
| 56 | | abcD   ("D")
 | |
| 57 | | abcE   ("E")
 | |
| 58 | | abcF   ("F")
 | |
| 59 | | abcG   ("G")
 | |
| 60 | | abcH   ("H")
 | |
| 61 | | abcI   ("I")
 | |
| 62 | | abcJ   ("J")
 | |
| 63 | | abcK   ("K")
 | |
| 64 | | abcL   ("L")
 | |
| 65 | | abcM   ("M")
 | |
| 66 | | abcN   ("N")
 | |
| 67 | | abcO   ("O")
 | |
| 68 | | abcP   ("P")
 | |
| 69 | | abcR   ("R")
 | |
| 70 | | abcS   ("S")
 | |
| 71 | | abcSv  ("Š")
 | |
| 72 | | abcT   ("T")
 | |
| 73 | | abcU   ("U")
 | |
| 74 | | abcV   ("V")
 | |
| 75 | | abcZ   ("Z")
 | |
| 76 | | abcvZ  ("Ž")
 | |
| 77 | | abcSpc | |
| 78 | ||
| 79 | thm abeceda.induct | |
| 80 | ||
| 81 | ||
| 82 | text{* Conversion from cyrillic to latin - 
 | |
| 83 | this conversion is valid in all cases *} | |
| 84 | primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list" | |
| 85 | where | |
| 86 | "azb2abc_aux А = [A]" | |
| 87 | | "azb2abc_aux Б = [B]" | |
| 88 | | "azb2abc_aux В = [V]" | |
| 89 | | "azb2abc_aux Г = [G]" | |
| 90 | | "azb2abc_aux Д = [D]" | |
| 91 | | "azb2abc_aux Ђ = [D, J]" | |
| 92 | | "azb2abc_aux Е = [E]" | |
| 93 | | "azb2abc_aux Ж = [Ž]" | |
| 94 | | "azb2abc_aux З = [Z]" | |
| 95 | | "azb2abc_aux И = [I]" | |
| 96 | | "azb2abc_aux Ј = [J]" | |
| 97 | | "azb2abc_aux К = [K]" | |
| 98 | | "azb2abc_aux Л = [L]" | |
| 99 | | "azb2abc_aux Љ = [L, J]" | |
| 100 | | "azb2abc_aux М = [M]" | |
| 101 | | "azb2abc_aux Н = [N]" | |
| 102 | | "azb2abc_aux Њ = [N, J]" | |
| 103 | | "azb2abc_aux О = [O]" | |
| 104 | | "azb2abc_aux П = [P]" | |
| 105 | | "azb2abc_aux Р = [R]" | |
| 106 | | "azb2abc_aux С = [S]" | |
| 107 | | "azb2abc_aux Т = [T]" | |
| 108 | | "azb2abc_aux Ћ = [Ć]" | |
| 109 | | "azb2abc_aux У = [U]" | |
| 110 | | "azb2abc_aux Ф = [F]" | |
| 111 | | "azb2abc_aux Х = [H]" | |
| 112 | | "azb2abc_aux Ц = [C]" | |
| 113 | | "azb2abc_aux Ч = [Č]" | |
| 114 | | "azb2abc_aux Џ = [D, Ž]" | |
| 115 | | "azb2abc_aux Ш = [Š]" | |
| 116 | | "azb2abc_aux azbSpc = [abcSpc]" | |
| 117 | ||
| 118 | primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list" | |
| 119 | where | |
| 120 | "azb2abc [] = []" | |
| 121 | | "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs" | |
| 122 | ||
| 123 | value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]" | |
| 124 | value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]" | |
| 125 | ||
| 126 | text{* The conversion from latin to cyrillic - 
 | |
| 127 | this conversion is valid in most cases but there are some exceptions *} | |
| 128 | primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka" | |
| 129 | where | |
| 130 | "abc2azb_aux A = А" | |
| 131 | | "abc2azb_aux B = Б" | |
| 132 | | "abc2azb_aux C = Ц" | |
| 133 | | "abc2azb_aux Č = Ч" | |
| 134 | | "abc2azb_aux Ć = Ћ" | |
| 135 | | "abc2azb_aux D = Д" | |
| 136 | | "abc2azb_aux E = Е" | |
| 137 | | "abc2azb_aux F = Ф" | |
| 138 | | "abc2azb_aux G = Г" | |
| 139 | | "abc2azb_aux H = Х" | |
| 140 | | "abc2azb_aux I = И" | |
| 141 | | "abc2azb_aux J = Ј" | |
| 142 | | "abc2azb_aux K = К" | |
| 143 | | "abc2azb_aux L = Л" | |
| 144 | | "abc2azb_aux M = М" | |
| 145 | | "abc2azb_aux N = Н" | |
| 146 | | "abc2azb_aux O = О" | |
| 147 | | "abc2azb_aux P = П" | |
| 148 | | "abc2azb_aux R = Р" | |
| 149 | | "abc2azb_aux S = С" | |
| 150 | | "abc2azb_aux Š = Ш" | |
| 151 | | "abc2azb_aux T = Т" | |
| 152 | | "abc2azb_aux U = У" | |
| 153 | | "abc2azb_aux V = В" | |
| 154 | | "abc2azb_aux Z = З" | |
| 155 | | "abc2azb_aux Ž = Ж" | |
| 156 | | "abc2azb_aux abcSpc = azbSpc" | |
| 157 | ||
| 158 | fun abc2azb :: "abeceda list \<Rightarrow> azbuka list" | |
| 159 | where | |
| 160 | "abc2azb [] = []" | |
| 161 | | "abc2azb [x] = [abc2azb_aux x]" | |
| 162 | | "abc2azb (x1 # x2 # xs) = | |
| 163 | (if x1 = D \<and> x2 = J then | |
| 164 | Ђ # abc2azb xs | |
| 165 | else if x1 = L \<and> x2 = J then | |
| 166 | Љ # abc2azb xs | |
| 167 | else if x1 = N \<and> x2 = J then | |
| 168 | Њ # abc2azb xs | |
| 169 | else if x1 = D \<and> x2 = Ž then | |
| 170 | Џ # abc2azb xs | |
| 171 | else | |
| 172 | abc2azb_aux x1 # abc2azb (x2 # xs) | |
| 173 | )" | |
| 174 | ||
| 175 | value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]" | |
| 176 | value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]" | |
| 177 | ||
| 178 | text{* Here are some invalid conversions *}
 | |
| 179 | lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]" | |
| 180 | by simp | |
| 181 | text{* but it should be: НАДЖИВЕТИ *}
 | |
| 182 | lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]" | |
| 183 | by simp | |
| 184 | text{* but it should be: ИНЈЕКЦИЈА *}
 | |
| 185 | ||
| 186 | text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
 | |
| 187 | ||
| 188 | ||
| 189 | text{* Idempotency in one direction *}
 | |
| 190 | lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]" | |
| 191 | by (cases x) auto | |
| 192 | ||
| 193 | lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs" | |
| 194 | by (cases xs) auto | |
| 195 | ||
| 196 | lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs" | |
| 197 | by (cases xs) auto | |
| 198 | ||
| 199 | theorem "azb2abc (abc2azb x) = x" | |
| 200 | proof (induct x) | |
| 201 | case (Cons x1 xs) | |
| 202 | thus ?case | |
| 203 | proof (cases xs) | |
| 204 | case (Cons x2 xss) | |
| 205 | thus ?thesis | |
| 206 | using `azb2abc (abc2azb xs) = xs` | |
| 207 | by auto | |
| 208 | qed simp | |
| 209 | qed simp | |
| 210 | ||
| 211 | text{* Idempotency in the other direction does not hold *}
 | |
| 212 | lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]" | |
| 213 | by simp | |
| 214 | text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
 | |
| 215 | ||
| 216 | end |