src/HOL/ex/Serbian.thy
author haftmann
Fri Oct 10 19:55:32 2014 +0200 (2014-10-10)
changeset 58646 cd63a4b12a33
parent 58310 91ea607a34d8
child 58889 5b7a9633cfa8
permissions -rw-r--r--
specialized specification: avoid trivial instances
     1 (*  Author:     Filip Maric
     2 
     3 Example theory involving Unicode characters (UTF-8 encoding) -- 
     4 Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
     5 *)
     6 
     7 header {* A Serbian theory *}
     8 
     9 theory Serbian
    10 imports Main
    11 begin
    12 
    13 text{* Serbian cyrillic letters *}
    14 datatype azbuka =
    15   azbA   ("А")
    16 | azbB   ("Б")
    17 | azbV   ("В")
    18 | azbG   ("Г")
    19 | azbD   ("Д")
    20 | azbDj  ("Ђ")
    21 | azbE   ("Е")
    22 | azbZv  ("Ж")
    23 | azbZ   ("З")
    24 | azbI   ("И")
    25 | azbJ   ("Ј")
    26 | azbK   ("К")
    27 | azbL   ("Л")
    28 | azbLj  ("Љ")
    29 | azbM   ("М")
    30 | azbN   ("Н")
    31 | azbNj  ("Њ")
    32 | azbO   ("О")
    33 | azbP   ("П")
    34 | azbR   ("Р")
    35 | azbS   ("С")
    36 | azbT   ("Т")
    37 | azbC'  ("Ћ")
    38 | azbU   ("У")
    39 | azbF   ("Ф")
    40 | azbH   ("Х")
    41 | azbC   ("Ц")
    42 | azbCv  ("Ч")
    43 | azbDzv ("Џ")
    44 | azbSv  ("Ш")
    45 | azbSpc
    46 
    47 thm azbuka.induct
    48 
    49 text{* Serbian latin letters *}
    50 datatype abeceda =
    51   abcA   ("A")
    52 | abcB   ("B")
    53 | abcC   ("C")
    54 | abcCv  ("Č")
    55 | abcC'  ("Ć")
    56 | abcD   ("D")
    57 | abcE   ("E")
    58 | abcF   ("F")
    59 | abcG   ("G")
    60 | abcH   ("H")
    61 | abcI   ("I")
    62 | abcJ   ("J")
    63 | abcK   ("K")
    64 | abcL   ("L")
    65 | abcM   ("M")
    66 | abcN   ("N")
    67 | abcO   ("O")
    68 | abcP   ("P")
    69 | abcR   ("R")
    70 | abcS   ("S")
    71 | abcSv  ("Š")
    72 | abcT   ("T")
    73 | abcU   ("U")
    74 | abcV   ("V")
    75 | abcZ   ("Z")
    76 | abcvZ  ("Ž")
    77 | abcSpc
    78 
    79 thm abeceda.induct
    80 
    81 
    82 text{* Conversion from cyrillic to latin - 
    83        this conversion is valid in all cases *}
    84 primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
    85 where
    86   "azb2abc_aux А = [A]"
    87 | "azb2abc_aux Б = [B]"
    88 | "azb2abc_aux В = [V]"
    89 | "azb2abc_aux Г = [G]"
    90 | "azb2abc_aux Д = [D]"
    91 | "azb2abc_aux Ђ = [D, J]"
    92 | "azb2abc_aux Е = [E]"
    93 | "azb2abc_aux Ж = [Ž]"
    94 | "azb2abc_aux З = [Z]"
    95 | "azb2abc_aux И = [I]"
    96 | "azb2abc_aux Ј = [J]"
    97 | "azb2abc_aux К = [K]"
    98 | "azb2abc_aux Л = [L]"
    99 | "azb2abc_aux Љ = [L, J]"
   100 | "azb2abc_aux М = [M]"
   101 | "azb2abc_aux Н = [N]"
   102 | "azb2abc_aux Њ = [N, J]"
   103 | "azb2abc_aux О = [O]"
   104 | "azb2abc_aux П = [P]"
   105 | "azb2abc_aux Р = [R]"
   106 | "azb2abc_aux С = [S]"
   107 | "azb2abc_aux Т = [T]"
   108 | "azb2abc_aux Ћ = [Ć]"
   109 | "azb2abc_aux У = [U]"
   110 | "azb2abc_aux Ф = [F]"
   111 | "azb2abc_aux Х = [H]"
   112 | "azb2abc_aux Ц = [C]"
   113 | "azb2abc_aux Ч = [Č]"
   114 | "azb2abc_aux Џ = [D, Ž]"
   115 | "azb2abc_aux Ш = [Š]"
   116 | "azb2abc_aux azbSpc = [abcSpc]"
   117 
   118 primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
   119 where
   120   "azb2abc [] = []"
   121 | "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
   122 
   123 value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
   124 value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
   125 
   126 text{* The conversion from latin to cyrillic - 
   127        this conversion is valid in most cases but there are some exceptions *}
   128 primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
   129 where
   130    "abc2azb_aux A = А"
   131 |  "abc2azb_aux B = Б"
   132 |  "abc2azb_aux C = Ц"
   133 |  "abc2azb_aux Č = Ч"
   134 |  "abc2azb_aux Ć = Ћ"
   135 |  "abc2azb_aux D = Д"
   136 |  "abc2azb_aux E = Е"
   137 |  "abc2azb_aux F = Ф"
   138 |  "abc2azb_aux G = Г"
   139 |  "abc2azb_aux H = Х"
   140 |  "abc2azb_aux I = И"
   141 |  "abc2azb_aux J = Ј"
   142 |  "abc2azb_aux K = К"
   143 |  "abc2azb_aux L = Л"
   144 |  "abc2azb_aux M = М"
   145 |  "abc2azb_aux N = Н"
   146 |  "abc2azb_aux O = О"
   147 |  "abc2azb_aux P = П"
   148 |  "abc2azb_aux R = Р"
   149 |  "abc2azb_aux S = С"
   150 |  "abc2azb_aux Š = Ш"
   151 |  "abc2azb_aux T = Т"
   152 |  "abc2azb_aux U = У"
   153 |  "abc2azb_aux V = В"
   154 |  "abc2azb_aux Z = З"
   155 |  "abc2azb_aux Ž = Ж"
   156 |  "abc2azb_aux abcSpc = azbSpc"
   157 
   158 fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
   159 where
   160   "abc2azb [] = []"
   161 | "abc2azb [x] = [abc2azb_aux x]"
   162 | "abc2azb (x1 # x2 # xs) = 
   163        (if x1 = D \<and> x2 = J then
   164            Ђ # abc2azb xs
   165         else if x1 = L \<and> x2 = J then
   166            Љ # abc2azb xs
   167         else if x1 = N \<and> x2 = J then
   168            Њ # abc2azb xs
   169         else if x1 = D \<and> x2 = Ž then
   170            Џ # abc2azb xs
   171         else
   172            abc2azb_aux x1 # abc2azb (x2 # xs)
   173        )"
   174 
   175 value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
   176 value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
   177 
   178 text{* Here are some invalid conversions *}
   179 lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
   180   by simp
   181 text{* but it should be: НАДЖИВЕТИ *}
   182 lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
   183   by simp
   184 text{* but it should be: ИНЈЕКЦИЈА *}
   185 
   186 text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   187 
   188 
   189 text{* Idempotency in one direction *}
   190 lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
   191   by (cases x) auto
   192 
   193 lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
   194   by (cases xs) auto
   195 
   196 lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
   197   by (cases xs) auto
   198 
   199 theorem "azb2abc (abc2azb x) = x"
   200 proof (induct x)
   201   case (Cons x1 xs)
   202   thus ?case
   203   proof (cases xs)
   204     case (Cons x2 xss)
   205     thus ?thesis
   206       using `azb2abc (abc2azb xs) = xs`
   207       by auto
   208   qed simp
   209 qed simp
   210 
   211 text{* Idempotency in the other direction does not hold *}
   212 lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
   213   by simp
   214 text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   215 
   216 end