src/HOL/ex/Serbian.thy
author nipkow
Fri Mar 06 17:38:47 2009 +0100 (2009-03-06)
changeset 30313 b2441b0c8d38
parent 30179 c703c9368c12
child 40967 5eb59b62e7de
permissions -rw-r--r--
added lemmas
     1 (* -*- coding: utf-8 -*- :encoding=utf-8:  
     2     Author:     Filip Maric
     3 
     4 Example theory involving Unicode characters (UTF-8 encoding) -- 
     5 Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
     6 *)
     7 
     8 header {* A Serbian theory *}
     9 
    10 theory Serbian
    11 imports Main
    12 begin
    13 
    14 text{* Serbian cyrillic letters *}
    15 datatype azbuka =
    16   azbA   ("А")
    17 | azbB   ("Б")
    18 | azbV   ("В")
    19 | azbG   ("Г")
    20 | azbD   ("Д")
    21 | azbDj  ("Ђ")
    22 | azbE   ("Е")
    23 | azbZv  ("Ж")
    24 | azbZ   ("З")
    25 | azbI   ("И")
    26 | azbJ   ("Ј")
    27 | azbK   ("К")
    28 | azbL   ("Л")
    29 | azbLj  ("Љ")
    30 | azbM   ("М")
    31 | azbN   ("Н")
    32 | azbNj  ("Њ")
    33 | azbO   ("О")
    34 | azbP   ("П")
    35 | azbR   ("Р")
    36 | azbS   ("С")
    37 | azbT   ("Т")
    38 | azbC'  ("Ћ")
    39 | azbU   ("У")
    40 | azbF   ("Ф")
    41 | azbH   ("Х")
    42 | azbC   ("Ц")
    43 | azbCv  ("Ч")
    44 | azbDzv ("Џ")
    45 | azbSv  ("Ш")
    46 | azbSpc
    47 
    48 thm azbuka.induct
    49 
    50 text{* Serbian latin letters *}
    51 datatype abeceda =
    52   abcA   ("A")
    53 | abcB   ("B")
    54 | abcC   ("C")
    55 | abcCv  ("Č")
    56 | abcC'  ("Ć")
    57 | abcD   ("D")
    58 | abcE   ("E")
    59 | abcF   ("F")
    60 | abcG   ("G")
    61 | abcH   ("H")
    62 | abcI   ("I")
    63 | abcJ   ("J")
    64 | abcK   ("K")
    65 | abcL   ("L")
    66 | abcM   ("M")
    67 | abcN   ("N")
    68 | abcO   ("O")
    69 | abcP   ("P")
    70 | abcR   ("R")
    71 | abcS   ("S")
    72 | abcSv  ("Š")
    73 | abcT   ("T")
    74 | abcU   ("U")
    75 | abcV   ("V")
    76 | abcZ   ("Z")
    77 | abcvZ  ("Ž")
    78 | abcSpc
    79 
    80 thm abeceda.induct
    81 
    82 
    83 text{* Conversion from cyrillic to latin - 
    84        this conversion is valid in all cases *}
    85 primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
    86 where
    87   "azb2abc_aux А = [A]"
    88 | "azb2abc_aux Б = [B]"
    89 | "azb2abc_aux В = [V]"
    90 | "azb2abc_aux Г = [G]"
    91 | "azb2abc_aux Д = [D]"
    92 | "azb2abc_aux Ђ = [D, J]"
    93 | "azb2abc_aux Е = [E]"
    94 | "azb2abc_aux Ж = [Ž]"
    95 | "azb2abc_aux З = [Z]"
    96 | "azb2abc_aux И = [I]"
    97 | "azb2abc_aux Ј = [J]"
    98 | "azb2abc_aux К = [K]"
    99 | "azb2abc_aux Л = [L]"
   100 | "azb2abc_aux Љ = [L, J]"
   101 | "azb2abc_aux М = [M]"
   102 | "azb2abc_aux Н = [N]"
   103 | "azb2abc_aux Њ = [N, J]"
   104 | "azb2abc_aux О = [O]"
   105 | "azb2abc_aux П = [P]"
   106 | "azb2abc_aux Р = [R]"
   107 | "azb2abc_aux С = [S]"
   108 | "azb2abc_aux Т = [T]"
   109 | "azb2abc_aux Ћ = [Ć]"
   110 | "azb2abc_aux У = [U]"
   111 | "azb2abc_aux Ф = [F]"
   112 | "azb2abc_aux Х = [H]"
   113 | "azb2abc_aux Ц = [C]"
   114 | "azb2abc_aux Ч = [Č]"
   115 | "azb2abc_aux Џ = [D, Ž]"
   116 | "azb2abc_aux Ш = [Š]"
   117 | "azb2abc_aux azbSpc = [abcSpc]"
   118 
   119 primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
   120 where
   121   "azb2abc [] = []"
   122 | "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
   123 
   124 value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
   125 value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
   126 
   127 text{* The conversion from latin to cyrillic - 
   128        this conversion is valid in most cases but there are some exceptions *}
   129 primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
   130 where
   131    "abc2azb_aux A = А"
   132 |  "abc2azb_aux B = Б"
   133 |  "abc2azb_aux C = Ц"
   134 |  "abc2azb_aux Č = Ч"
   135 |  "abc2azb_aux Ć = Ћ"
   136 |  "abc2azb_aux D = Д"
   137 |  "abc2azb_aux E = Е"
   138 |  "abc2azb_aux F = Ф"
   139 |  "abc2azb_aux G = Г"
   140 |  "abc2azb_aux H = Х"
   141 |  "abc2azb_aux I = И"
   142 |  "abc2azb_aux J = Ј"
   143 |  "abc2azb_aux K = К"
   144 |  "abc2azb_aux L = Л"
   145 |  "abc2azb_aux M = М"
   146 |  "abc2azb_aux N = Н"
   147 |  "abc2azb_aux O = О"
   148 |  "abc2azb_aux P = П"
   149 |  "abc2azb_aux R = Р"
   150 |  "abc2azb_aux S = С"
   151 |  "abc2azb_aux Š = Ш"
   152 |  "abc2azb_aux T = Т"
   153 |  "abc2azb_aux U = У"
   154 |  "abc2azb_aux V = В"
   155 |  "abc2azb_aux Z = З"
   156 |  "abc2azb_aux Ž = Ж"
   157 |  "abc2azb_aux abcSpc = azbSpc"
   158 
   159 fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
   160 where
   161   "abc2azb [] = []"
   162 | "abc2azb [x] = [abc2azb_aux x]"
   163 | "abc2azb (x1 # x2 # xs) = 
   164        (if x1 = D \<and> x2 = J then
   165            Ђ # abc2azb xs
   166         else if x1 = L \<and> x2 = J then
   167            Љ # abc2azb xs
   168         else if x1 = N \<and> x2 = J then
   169            Њ # abc2azb xs
   170         else if x1 = D \<and> x2 = Ž then
   171            Џ # abc2azb xs
   172         else
   173            abc2azb_aux x1 # abc2azb (x2 # xs)
   174        )"
   175 
   176 value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
   177 value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
   178 
   179 text{* Here are some invalid conversions *}
   180 lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
   181   by simp
   182 text{* but it should be: НАДЖИВЕТИ *}
   183 lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
   184   by simp
   185 text{* but it should be: ИНЈЕКЦИЈА *}
   186 
   187 text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   188 
   189 
   190 text{* Idempotency in one direction *}
   191 lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
   192   by (cases x) auto
   193 
   194 lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
   195   by (cases xs) auto
   196 
   197 lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
   198   by (cases xs) auto
   199 
   200 theorem "azb2abc (abc2azb x) = x"
   201 proof (induct x)
   202   case (Cons x1 xs)
   203   thus ?case
   204   proof (cases xs)
   205     case (Cons x2 xss)
   206     thus ?thesis
   207       using `azb2abc (abc2azb xs) = xs`
   208       by auto
   209   qed simp
   210 qed simp
   211 
   212 text{* Idempotency in the other direction does not hold *}
   213 lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
   214   by simp
   215 text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   216 
   217 end