src/HOL/ex/Serbian.thy
author wenzelm
Wed Jun 22 10:09:20 2016 +0200 (2016-06-22)
changeset 63343 fb5d8a50c641
parent 61343 5b5656a63bd6
child 66392 c1a9bcbeeec2
permissions -rw-r--r--
bundle lifting_syntax;
wenzelm@40967
     1
(*  Author:     Filip Maric
wenzelm@30179
     2
wenzelm@30179
     3
Example theory involving Unicode characters (UTF-8 encoding) -- 
wenzelm@30179
     4
Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
wenzelm@30179
     5
*)
wenzelm@30179
     6
wenzelm@61343
     7
section \<open>A Serbian theory\<close>
wenzelm@30179
     8
wenzelm@30179
     9
theory Serbian
wenzelm@30179
    10
imports Main
wenzelm@30179
    11
begin
wenzelm@30179
    12
wenzelm@61343
    13
text\<open>Serbian cyrillic letters\<close>
blanchet@58310
    14
datatype azbuka =
wenzelm@30179
    15
  azbA   ("А")
wenzelm@30179
    16
| azbB   ("Б")
wenzelm@30179
    17
| azbV   ("В")
wenzelm@30179
    18
| azbG   ("Г")
wenzelm@30179
    19
| azbD   ("Д")
wenzelm@30179
    20
| azbDj  ("Ђ")
wenzelm@30179
    21
| azbE   ("Е")
wenzelm@30179
    22
| azbZv  ("Ж")
wenzelm@30179
    23
| azbZ   ("З")
wenzelm@30179
    24
| azbI   ("И")
wenzelm@30179
    25
| azbJ   ("Ј")
wenzelm@30179
    26
| azbK   ("К")
wenzelm@30179
    27
| azbL   ("Л")
wenzelm@30179
    28
| azbLj  ("Љ")
wenzelm@30179
    29
| azbM   ("М")
wenzelm@30179
    30
| azbN   ("Н")
wenzelm@30179
    31
| azbNj  ("Њ")
wenzelm@30179
    32
| azbO   ("О")
wenzelm@30179
    33
| azbP   ("П")
wenzelm@30179
    34
| azbR   ("Р")
wenzelm@30179
    35
| azbS   ("С")
wenzelm@30179
    36
| azbT   ("Т")
wenzelm@30179
    37
| azbC'  ("Ћ")
wenzelm@30179
    38
| azbU   ("У")
wenzelm@30179
    39
| azbF   ("Ф")
wenzelm@30179
    40
| azbH   ("Х")
wenzelm@30179
    41
| azbC   ("Ц")
wenzelm@30179
    42
| azbCv  ("Ч")
wenzelm@30179
    43
| azbDzv ("Џ")
wenzelm@30179
    44
| azbSv  ("Ш")
wenzelm@30179
    45
| azbSpc
wenzelm@30179
    46
wenzelm@30179
    47
thm azbuka.induct
wenzelm@30179
    48
wenzelm@61343
    49
text\<open>Serbian latin letters\<close>
blanchet@58310
    50
datatype abeceda =
wenzelm@30179
    51
  abcA   ("A")
wenzelm@30179
    52
| abcB   ("B")
wenzelm@30179
    53
| abcC   ("C")
wenzelm@30179
    54
| abcCv  ("Č")
wenzelm@30179
    55
| abcC'  ("Ć")
wenzelm@30179
    56
| abcD   ("D")
wenzelm@30179
    57
| abcE   ("E")
wenzelm@30179
    58
| abcF   ("F")
wenzelm@30179
    59
| abcG   ("G")
wenzelm@30179
    60
| abcH   ("H")
wenzelm@30179
    61
| abcI   ("I")
wenzelm@30179
    62
| abcJ   ("J")
wenzelm@30179
    63
| abcK   ("K")
wenzelm@30179
    64
| abcL   ("L")
wenzelm@30179
    65
| abcM   ("M")
wenzelm@30179
    66
| abcN   ("N")
wenzelm@30179
    67
| abcO   ("O")
wenzelm@30179
    68
| abcP   ("P")
wenzelm@30179
    69
| abcR   ("R")
wenzelm@30179
    70
| abcS   ("S")
wenzelm@30179
    71
| abcSv  ("Š")
wenzelm@30179
    72
| abcT   ("T")
wenzelm@30179
    73
| abcU   ("U")
wenzelm@30179
    74
| abcV   ("V")
wenzelm@30179
    75
| abcZ   ("Z")
wenzelm@30179
    76
| abcvZ  ("Ž")
wenzelm@30179
    77
| abcSpc
wenzelm@30179
    78
wenzelm@30179
    79
thm abeceda.induct
wenzelm@30179
    80
wenzelm@30179
    81
wenzelm@61343
    82
text\<open>Conversion from cyrillic to latin - 
wenzelm@61343
    83
       this conversion is valid in all cases\<close>
wenzelm@30179
    84
primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
wenzelm@30179
    85
where
wenzelm@30179
    86
  "azb2abc_aux А = [A]"
wenzelm@30179
    87
| "azb2abc_aux Б = [B]"
wenzelm@30179
    88
| "azb2abc_aux В = [V]"
wenzelm@30179
    89
| "azb2abc_aux Г = [G]"
wenzelm@30179
    90
| "azb2abc_aux Д = [D]"
wenzelm@30179
    91
| "azb2abc_aux Ђ = [D, J]"
wenzelm@30179
    92
| "azb2abc_aux Е = [E]"
wenzelm@30179
    93
| "azb2abc_aux Ж = [Ž]"
wenzelm@30179
    94
| "azb2abc_aux З = [Z]"
wenzelm@30179
    95
| "azb2abc_aux И = [I]"
wenzelm@30179
    96
| "azb2abc_aux Ј = [J]"
wenzelm@30179
    97
| "azb2abc_aux К = [K]"
wenzelm@30179
    98
| "azb2abc_aux Л = [L]"
wenzelm@30179
    99
| "azb2abc_aux Љ = [L, J]"
wenzelm@30179
   100
| "azb2abc_aux М = [M]"
wenzelm@30179
   101
| "azb2abc_aux Н = [N]"
wenzelm@30179
   102
| "azb2abc_aux Њ = [N, J]"
wenzelm@30179
   103
| "azb2abc_aux О = [O]"
wenzelm@30179
   104
| "azb2abc_aux П = [P]"
wenzelm@30179
   105
| "azb2abc_aux Р = [R]"
wenzelm@30179
   106
| "azb2abc_aux С = [S]"
wenzelm@30179
   107
| "azb2abc_aux Т = [T]"
wenzelm@30179
   108
| "azb2abc_aux Ћ = [Ć]"
wenzelm@30179
   109
| "azb2abc_aux У = [U]"
wenzelm@30179
   110
| "azb2abc_aux Ф = [F]"
wenzelm@30179
   111
| "azb2abc_aux Х = [H]"
wenzelm@30179
   112
| "azb2abc_aux Ц = [C]"
wenzelm@30179
   113
| "azb2abc_aux Ч = [Č]"
wenzelm@30179
   114
| "azb2abc_aux Џ = [D, Ž]"
wenzelm@30179
   115
| "azb2abc_aux Ш = [Š]"
wenzelm@30179
   116
| "azb2abc_aux azbSpc = [abcSpc]"
wenzelm@30179
   117
wenzelm@30179
   118
primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
wenzelm@30179
   119
where
wenzelm@30179
   120
  "azb2abc [] = []"
wenzelm@30179
   121
| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
wenzelm@30179
   122
wenzelm@30179
   123
value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
wenzelm@30179
   124
value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
wenzelm@30179
   125
wenzelm@61343
   126
text\<open>The conversion from latin to cyrillic - 
wenzelm@61343
   127
       this conversion is valid in most cases but there are some exceptions\<close>
wenzelm@30179
   128
primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
wenzelm@30179
   129
where
wenzelm@30179
   130
   "abc2azb_aux A = А"
wenzelm@30179
   131
|  "abc2azb_aux B = Б"
wenzelm@30179
   132
|  "abc2azb_aux C = Ц"
wenzelm@30179
   133
|  "abc2azb_aux Č = Ч"
wenzelm@30179
   134
|  "abc2azb_aux Ć = Ћ"
wenzelm@30179
   135
|  "abc2azb_aux D = Д"
wenzelm@30179
   136
|  "abc2azb_aux E = Е"
wenzelm@30179
   137
|  "abc2azb_aux F = Ф"
wenzelm@30179
   138
|  "abc2azb_aux G = Г"
wenzelm@30179
   139
|  "abc2azb_aux H = Х"
wenzelm@30179
   140
|  "abc2azb_aux I = И"
wenzelm@30179
   141
|  "abc2azb_aux J = Ј"
wenzelm@30179
   142
|  "abc2azb_aux K = К"
wenzelm@30179
   143
|  "abc2azb_aux L = Л"
wenzelm@30179
   144
|  "abc2azb_aux M = М"
wenzelm@30179
   145
|  "abc2azb_aux N = Н"
wenzelm@30179
   146
|  "abc2azb_aux O = О"
wenzelm@30179
   147
|  "abc2azb_aux P = П"
wenzelm@30179
   148
|  "abc2azb_aux R = Р"
wenzelm@30179
   149
|  "abc2azb_aux S = С"
wenzelm@30179
   150
|  "abc2azb_aux Š = Ш"
wenzelm@30179
   151
|  "abc2azb_aux T = Т"
wenzelm@30179
   152
|  "abc2azb_aux U = У"
wenzelm@30179
   153
|  "abc2azb_aux V = В"
wenzelm@30179
   154
|  "abc2azb_aux Z = З"
wenzelm@30179
   155
|  "abc2azb_aux Ž = Ж"
wenzelm@30179
   156
|  "abc2azb_aux abcSpc = azbSpc"
wenzelm@30179
   157
wenzelm@30179
   158
fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
wenzelm@30179
   159
where
wenzelm@30179
   160
  "abc2azb [] = []"
wenzelm@30179
   161
| "abc2azb [x] = [abc2azb_aux x]"
wenzelm@30179
   162
| "abc2azb (x1 # x2 # xs) = 
wenzelm@30179
   163
       (if x1 = D \<and> x2 = J then
wenzelm@30179
   164
           Ђ # abc2azb xs
wenzelm@30179
   165
        else if x1 = L \<and> x2 = J then
wenzelm@30179
   166
           Љ # abc2azb xs
wenzelm@30179
   167
        else if x1 = N \<and> x2 = J then
wenzelm@30179
   168
           Њ # abc2azb xs
wenzelm@30179
   169
        else if x1 = D \<and> x2 = Ž then
wenzelm@30179
   170
           Џ # abc2azb xs
wenzelm@30179
   171
        else
wenzelm@30179
   172
           abc2azb_aux x1 # abc2azb (x2 # xs)
wenzelm@30179
   173
       )"
wenzelm@30179
   174
wenzelm@30179
   175
value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
wenzelm@30179
   176
value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
wenzelm@30179
   177
wenzelm@61343
   178
text\<open>Here are some invalid conversions\<close>
wenzelm@30179
   179
lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
wenzelm@30179
   180
  by simp
wenzelm@61343
   181
text\<open>but it should be: НАДЖИВЕТИ\<close>
wenzelm@30179
   182
lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
wenzelm@30179
   183
  by simp
wenzelm@61343
   184
text\<open>but it should be: ИНЈЕКЦИЈА\<close>
wenzelm@30179
   185
wenzelm@61343
   186
text\<open>The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ\<close>
wenzelm@30179
   187
wenzelm@30179
   188
wenzelm@61343
   189
text\<open>Idempotency in one direction\<close>
wenzelm@30179
   190
lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
wenzelm@30179
   191
  by (cases x) auto
wenzelm@30179
   192
wenzelm@30179
   193
lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
wenzelm@30179
   194
  by (cases xs) auto
wenzelm@30179
   195
wenzelm@30179
   196
lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
wenzelm@30179
   197
  by (cases xs) auto
wenzelm@30179
   198
wenzelm@30179
   199
theorem "azb2abc (abc2azb x) = x"
wenzelm@30179
   200
proof (induct x)
wenzelm@30179
   201
  case (Cons x1 xs)
wenzelm@30179
   202
  thus ?case
wenzelm@30179
   203
  proof (cases xs)
wenzelm@30179
   204
    case (Cons x2 xss)
wenzelm@30179
   205
    thus ?thesis
wenzelm@61343
   206
      using \<open>azb2abc (abc2azb xs) = xs\<close>
wenzelm@30179
   207
      by auto
wenzelm@30179
   208
  qed simp
wenzelm@30179
   209
qed simp
wenzelm@30179
   210
wenzelm@61343
   211
text\<open>Idempotency in the other direction does not hold\<close>
wenzelm@30179
   212
lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
wenzelm@30179
   213
  by simp
wenzelm@61343
   214
text\<open>It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ\<close>
wenzelm@30179
   215
wenzelm@30179
   216
end