| 30179 |      1 | (* -*- coding: utf-8 -*- :encoding=utf-8:  
 | 
|  |      2 |     Author:     Filip Maric
 | 
|  |      3 | 
 | 
|  |      4 | Example theory involving Unicode characters (UTF-8 encoding) -- 
 | 
|  |      5 | Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
 | 
|  |      6 | *)
 | 
|  |      7 | 
 | 
|  |      8 | header {* A Serbian theory *}
 | 
|  |      9 | 
 | 
|  |     10 | theory Serbian
 | 
|  |     11 | imports Main
 | 
|  |     12 | begin
 | 
|  |     13 | 
 | 
|  |     14 | text{* Serbian cyrillic letters *}
 | 
|  |     15 | datatype azbuka =
 | 
|  |     16 |   azbA   ("А")
 | 
|  |     17 | | azbB   ("Б")
 | 
|  |     18 | | azbV   ("В")
 | 
|  |     19 | | azbG   ("Г")
 | 
|  |     20 | | azbD   ("Д")
 | 
|  |     21 | | azbDj  ("Ђ")
 | 
|  |     22 | | azbE   ("Е")
 | 
|  |     23 | | azbZv  ("Ж")
 | 
|  |     24 | | azbZ   ("З")
 | 
|  |     25 | | azbI   ("И")
 | 
|  |     26 | | azbJ   ("Ј")
 | 
|  |     27 | | azbK   ("К")
 | 
|  |     28 | | azbL   ("Л")
 | 
|  |     29 | | azbLj  ("Љ")
 | 
|  |     30 | | azbM   ("М")
 | 
|  |     31 | | azbN   ("Н")
 | 
|  |     32 | | azbNj  ("Њ")
 | 
|  |     33 | | azbO   ("О")
 | 
|  |     34 | | azbP   ("П")
 | 
|  |     35 | | azbR   ("Р")
 | 
|  |     36 | | azbS   ("С")
 | 
|  |     37 | | azbT   ("Т")
 | 
|  |     38 | | azbC'  ("Ћ")
 | 
|  |     39 | | azbU   ("У")
 | 
|  |     40 | | azbF   ("Ф")
 | 
|  |     41 | | azbH   ("Х")
 | 
|  |     42 | | azbC   ("Ц")
 | 
|  |     43 | | azbCv  ("Ч")
 | 
|  |     44 | | azbDzv ("Џ")
 | 
|  |     45 | | azbSv  ("Ш")
 | 
|  |     46 | | azbSpc
 | 
|  |     47 | 
 | 
|  |     48 | thm azbuka.induct
 | 
|  |     49 | 
 | 
|  |     50 | text{* Serbian latin letters *}
 | 
|  |     51 | datatype abeceda =
 | 
|  |     52 |   abcA   ("A")
 | 
|  |     53 | | abcB   ("B")
 | 
|  |     54 | | abcC   ("C")
 | 
|  |     55 | | abcCv  ("Č")
 | 
|  |     56 | | abcC'  ("Ć")
 | 
|  |     57 | | abcD   ("D")
 | 
|  |     58 | | abcE   ("E")
 | 
|  |     59 | | abcF   ("F")
 | 
|  |     60 | | abcG   ("G")
 | 
|  |     61 | | abcH   ("H")
 | 
|  |     62 | | abcI   ("I")
 | 
|  |     63 | | abcJ   ("J")
 | 
|  |     64 | | abcK   ("K")
 | 
|  |     65 | | abcL   ("L")
 | 
|  |     66 | | abcM   ("M")
 | 
|  |     67 | | abcN   ("N")
 | 
|  |     68 | | abcO   ("O")
 | 
|  |     69 | | abcP   ("P")
 | 
|  |     70 | | abcR   ("R")
 | 
|  |     71 | | abcS   ("S")
 | 
|  |     72 | | abcSv  ("Š")
 | 
|  |     73 | | abcT   ("T")
 | 
|  |     74 | | abcU   ("U")
 | 
|  |     75 | | abcV   ("V")
 | 
|  |     76 | | abcZ   ("Z")
 | 
|  |     77 | | abcvZ  ("Ž")
 | 
|  |     78 | | abcSpc
 | 
|  |     79 | 
 | 
|  |     80 | thm abeceda.induct
 | 
|  |     81 | 
 | 
|  |     82 | 
 | 
|  |     83 | text{* Conversion from cyrillic to latin - 
 | 
|  |     84 |        this conversion is valid in all cases *}
 | 
|  |     85 | primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
 | 
|  |     86 | where
 | 
|  |     87 |   "azb2abc_aux А = [A]"
 | 
|  |     88 | | "azb2abc_aux Б = [B]"
 | 
|  |     89 | | "azb2abc_aux В = [V]"
 | 
|  |     90 | | "azb2abc_aux Г = [G]"
 | 
|  |     91 | | "azb2abc_aux Д = [D]"
 | 
|  |     92 | | "azb2abc_aux Ђ = [D, J]"
 | 
|  |     93 | | "azb2abc_aux Е = [E]"
 | 
|  |     94 | | "azb2abc_aux Ж = [Ž]"
 | 
|  |     95 | | "azb2abc_aux З = [Z]"
 | 
|  |     96 | | "azb2abc_aux И = [I]"
 | 
|  |     97 | | "azb2abc_aux Ј = [J]"
 | 
|  |     98 | | "azb2abc_aux К = [K]"
 | 
|  |     99 | | "azb2abc_aux Л = [L]"
 | 
|  |    100 | | "azb2abc_aux Љ = [L, J]"
 | 
|  |    101 | | "azb2abc_aux М = [M]"
 | 
|  |    102 | | "azb2abc_aux Н = [N]"
 | 
|  |    103 | | "azb2abc_aux Њ = [N, J]"
 | 
|  |    104 | | "azb2abc_aux О = [O]"
 | 
|  |    105 | | "azb2abc_aux П = [P]"
 | 
|  |    106 | | "azb2abc_aux Р = [R]"
 | 
|  |    107 | | "azb2abc_aux С = [S]"
 | 
|  |    108 | | "azb2abc_aux Т = [T]"
 | 
|  |    109 | | "azb2abc_aux Ћ = [Ć]"
 | 
|  |    110 | | "azb2abc_aux У = [U]"
 | 
|  |    111 | | "azb2abc_aux Ф = [F]"
 | 
|  |    112 | | "azb2abc_aux Х = [H]"
 | 
|  |    113 | | "azb2abc_aux Ц = [C]"
 | 
|  |    114 | | "azb2abc_aux Ч = [Č]"
 | 
|  |    115 | | "azb2abc_aux Џ = [D, Ž]"
 | 
|  |    116 | | "azb2abc_aux Ш = [Š]"
 | 
|  |    117 | | "azb2abc_aux azbSpc = [abcSpc]"
 | 
|  |    118 | 
 | 
|  |    119 | primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
 | 
|  |    120 | where
 | 
|  |    121 |   "azb2abc [] = []"
 | 
|  |    122 | | "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
 | 
|  |    123 | 
 | 
|  |    124 | value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
 | 
|  |    125 | value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
 | 
|  |    126 | 
 | 
|  |    127 | text{* The conversion from latin to cyrillic - 
 | 
|  |    128 |        this conversion is valid in most cases but there are some exceptions *}
 | 
|  |    129 | primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
 | 
|  |    130 | where
 | 
|  |    131 |    "abc2azb_aux A = А"
 | 
|  |    132 | |  "abc2azb_aux B = Б"
 | 
|  |    133 | |  "abc2azb_aux C = Ц"
 | 
|  |    134 | |  "abc2azb_aux Č = Ч"
 | 
|  |    135 | |  "abc2azb_aux Ć = Ћ"
 | 
|  |    136 | |  "abc2azb_aux D = Д"
 | 
|  |    137 | |  "abc2azb_aux E = Е"
 | 
|  |    138 | |  "abc2azb_aux F = Ф"
 | 
|  |    139 | |  "abc2azb_aux G = Г"
 | 
|  |    140 | |  "abc2azb_aux H = Х"
 | 
|  |    141 | |  "abc2azb_aux I = И"
 | 
|  |    142 | |  "abc2azb_aux J = Ј"
 | 
|  |    143 | |  "abc2azb_aux K = К"
 | 
|  |    144 | |  "abc2azb_aux L = Л"
 | 
|  |    145 | |  "abc2azb_aux M = М"
 | 
|  |    146 | |  "abc2azb_aux N = Н"
 | 
|  |    147 | |  "abc2azb_aux O = О"
 | 
|  |    148 | |  "abc2azb_aux P = П"
 | 
|  |    149 | |  "abc2azb_aux R = Р"
 | 
|  |    150 | |  "abc2azb_aux S = С"
 | 
|  |    151 | |  "abc2azb_aux Š = Ш"
 | 
|  |    152 | |  "abc2azb_aux T = Т"
 | 
|  |    153 | |  "abc2azb_aux U = У"
 | 
|  |    154 | |  "abc2azb_aux V = В"
 | 
|  |    155 | |  "abc2azb_aux Z = З"
 | 
|  |    156 | |  "abc2azb_aux Ž = Ж"
 | 
|  |    157 | |  "abc2azb_aux abcSpc = azbSpc"
 | 
|  |    158 | 
 | 
|  |    159 | fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
 | 
|  |    160 | where
 | 
|  |    161 |   "abc2azb [] = []"
 | 
|  |    162 | | "abc2azb [x] = [abc2azb_aux x]"
 | 
|  |    163 | | "abc2azb (x1 # x2 # xs) = 
 | 
|  |    164 |        (if x1 = D \<and> x2 = J then
 | 
|  |    165 |            Ђ # abc2azb xs
 | 
|  |    166 |         else if x1 = L \<and> x2 = J then
 | 
|  |    167 |            Љ # abc2azb xs
 | 
|  |    168 |         else if x1 = N \<and> x2 = J then
 | 
|  |    169 |            Њ # abc2azb xs
 | 
|  |    170 |         else if x1 = D \<and> x2 = Ž then
 | 
|  |    171 |            Џ # abc2azb xs
 | 
|  |    172 |         else
 | 
|  |    173 |            abc2azb_aux x1 # abc2azb (x2 # xs)
 | 
|  |    174 |        )"
 | 
|  |    175 | 
 | 
|  |    176 | value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
 | 
|  |    177 | value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
 | 
|  |    178 | 
 | 
|  |    179 | text{* Here are some invalid conversions *}
 | 
|  |    180 | lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
 | 
|  |    181 |   by simp
 | 
|  |    182 | text{* but it should be: НАДЖИВЕТИ *}
 | 
|  |    183 | lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
 | 
|  |    184 |   by simp
 | 
|  |    185 | text{* but it should be: ИНЈЕКЦИЈА *}
 | 
|  |    186 | 
 | 
|  |    187 | text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
 | 
|  |    188 | 
 | 
|  |    189 | 
 | 
|  |    190 | text{* Idempotency in one direction *}
 | 
|  |    191 | lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
 | 
|  |    192 |   by (cases x) auto
 | 
|  |    193 | 
 | 
|  |    194 | lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
 | 
|  |    195 |   by (cases xs) auto
 | 
|  |    196 | 
 | 
|  |    197 | lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
 | 
|  |    198 |   by (cases xs) auto
 | 
|  |    199 | 
 | 
|  |    200 | theorem "azb2abc (abc2azb x) = x"
 | 
|  |    201 | proof (induct x)
 | 
|  |    202 |   case (Cons x1 xs)
 | 
|  |    203 |   thus ?case
 | 
|  |    204 |   proof (cases xs)
 | 
|  |    205 |     case (Cons x2 xss)
 | 
|  |    206 |     thus ?thesis
 | 
|  |    207 |       using `azb2abc (abc2azb xs) = xs`
 | 
|  |    208 |       by auto
 | 
|  |    209 |   qed simp
 | 
|  |    210 | qed simp
 | 
|  |    211 | 
 | 
|  |    212 | text{* Idempotency in the other direction does not hold *}
 | 
|  |    213 | lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
 | 
|  |    214 |   by simp
 | 
|  |    215 | text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
 | 
|  |    216 | 
 | 
|  |    217 | end
 |