| author | wenzelm | 
| Wed, 25 Nov 2020 15:12:02 +0100 | |
| changeset 72707 | f1380c9f3806 | 
| parent 72661 | fca4d6abebda | 
| child 77888 | 3c837f8c8ed5 | 
| permissions | -rw-r--r-- | 
| 35628 | 1 | (* Title: Pure/General/sha1.ML | 
| 2 | Author: Makarius | |
| 62666 | 3 | Author: Sascha Boehme, TU Muenchen | 
| 35628 | 4 | |
| 62666 | 5 | Digesting strings according to SHA-1 (see RFC 3174). | 
| 35628 | 6 | *) | 
| 7 | ||
| 8 | signature SHA1 = | |
| 9 | sig | |
| 41954 | 10 | eqtype digest | 
| 11 | val rep: digest -> string | |
| 57638 
ed58e740a699
less authentic SHA1.digest: trust Scala side on blobs and avoid re-calculation via Foreign Language Interface, which might be a cause of problems;
 wenzelm parents: 
41954diff
changeset | 12 | val fake: string -> digest | 
| 62702 | 13 | val digest: string -> digest | 
| 62666 | 14 | val test_samples: unit -> unit | 
| 35628 | 15 | end; | 
| 16 | ||
| 17 | structure SHA1: SHA1 = | |
| 18 | struct | |
| 19 | ||
| 62666 | 20 | (** internal implementation in ML -- relatively slow **) | 
| 21 | ||
| 22 | local | |
| 23 | ||
| 35628 | 24 | (* 32bit words *) | 
| 25 | ||
| 26 | infix 4 << >>; | |
| 27 | infix 3 andb; | |
| 28 | infix 2 orb xorb; | |
| 29 | ||
| 30 | val op << = Word32.<<; | |
| 31 | val op >> = Word32.>>; | |
| 32 | val op andb = Word32.andb; | |
| 33 | val op orb = Word32.orb; | |
| 34 | val op xorb = Word32.xorb; | |
| 35 | val notb = Word32.notb; | |
| 36 | ||
| 37 | fun rotate k w = w << k orb w >> (0w32 - k); | |
| 38 | ||
| 39 | ||
| 40 | (* hexadecimal words *) | |
| 41 | ||
| 42 | fun hex_digit (text, w: Word32.word) = | |
| 43 | let | |
| 44 | val d = Word32.toInt (w andb 0wxf); | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
62819diff
changeset | 45 | val dig = if d < 10 then chr (Char.ord #"0" + d) else chr (Char.ord #"a" + d - 10); | 
| 35628 | 46 | in (dig ^ text, w >> 0w4) end; | 
| 47 | ||
| 48 | fun hex_word w = #1 (funpow 8 hex_digit ("", w));
 | |
| 49 | ||
| 50 | ||
| 51 | (* padding *) | |
| 52 | ||
| 62666 | 53 | fun pack_bytes 0 _ = "" | 
| 35628 | 54 | | pack_bytes k n = pack_bytes (k - 1) (n div 256) ^ chr (n mod 256); | 
| 55 | ||
| 56 | fun padded_text str = | |
| 57 | let | |
| 58 | val len = size str; | |
| 59 | val padding = chr 128 ^ replicate_string (~ (len + 9) mod 64) (chr 0) ^ pack_bytes 8 (len * 8); | |
| 60 | fun byte i = Char.ord (String.sub (if i < len then (str, i) else (padding, (i - len)))); | |
| 61 | fun word i = | |
| 62 | Word32.fromInt (byte (4 * i)) << 0w24 orb | |
| 63 | Word32.fromInt (byte (4 * i + 1)) << 0w16 orb | |
| 64 | Word32.fromInt (byte (4 * i + 2)) << 0w8 orb | |
| 65 | Word32.fromInt (byte (4 * i + 3)); | |
| 66 | in ((len + size padding) div 4, word) end; | |
| 67 | ||
| 68 | ||
| 62666 | 69 | (* digest_string_internal *) | 
| 35628 | 70 | |
| 71 | fun digest_word (i, w, {a, b, c, d, e}) =
 | |
| 72 | let | |
| 73 |     val {f, k} =
 | |
| 74 | if i < 20 then | |
| 75 |         {f = (b andb c) orb (notb b andb d),
 | |
| 76 | k = 0wx5A827999} | |
| 77 | else if i < 40 then | |
| 78 |         {f = b xorb c xorb d,
 | |
| 79 | k = 0wx6ED9EBA1} | |
| 80 | else if i < 60 then | |
| 81 |         {f = (b andb c) orb (b andb d) orb (c andb d),
 | |
| 82 | k = 0wx8F1BBCDC} | |
| 83 | else | |
| 84 |         {f = b xorb c xorb d,
 | |
| 85 | k = 0wxCA62C1D6}; | |
| 86 | val op + = Word32.+; | |
| 87 | in | |
| 88 |     {a = rotate 0w5 a + f + e + w + k,
 | |
| 89 | b = a, | |
| 90 | c = rotate 0w30 b, | |
| 91 | d = c, | |
| 92 | e = d} | |
| 93 | end; | |
| 94 | ||
| 62666 | 95 | in | 
| 96 | ||
| 97 | fun digest_string_internal str = | |
| 35628 | 98 | let | 
| 99 | val (text_len, text) = padded_text str; | |
| 100 | ||
| 101 | (*hash result -- 5 words*) | |
| 102 | val hash_array : Word32.word Array.array = | |
| 103 | Array.fromList [0wx67452301, 0wxEFCDAB89, 0wx98BADCFE, 0wx10325476, 0wxC3D2E1F0]; | |
| 104 | fun hash i = Array.sub (hash_array, i); | |
| 105 | fun add_hash x i = Array.update (hash_array, i, hash i + x); | |
| 106 | ||
| 107 | (*current chunk -- 80 words*) | |
| 108 | val chunk_array = Array.array (80, 0w0: Word32.word); | |
| 109 | fun chunk i = Array.sub (chunk_array, i); | |
| 110 | fun init_chunk pos = | |
| 111 | Array.modifyi (fn (i, _) => | |
| 112 | if i < 16 then text (pos + i) | |
| 113 | else rotate 0w1 (chunk (i - 3) xorb chunk (i - 8) xorb chunk (i - 14) xorb chunk (i - 16))) | |
| 114 | chunk_array; | |
| 115 | ||
| 116 | fun digest_chunks pos = | |
| 117 | if pos < text_len then | |
| 118 | let | |
| 119 | val _ = init_chunk pos; | |
| 120 |           val {a, b, c, d, e} = Array.foldli digest_word
 | |
| 121 |             {a = hash 0,
 | |
| 122 | b = hash 1, | |
| 123 | c = hash 2, | |
| 124 | d = hash 3, | |
| 125 | e = hash 4} | |
| 126 | chunk_array; | |
| 127 | val _ = add_hash a 0; | |
| 128 | val _ = add_hash b 1; | |
| 129 | val _ = add_hash c 2; | |
| 130 | val _ = add_hash d 3; | |
| 131 | val _ = add_hash e 4; | |
| 132 | in digest_chunks (pos + 16) end | |
| 133 | else (); | |
| 134 | val _ = digest_chunks 0; | |
| 135 | ||
| 136 | val hex = hex_word o hash; | |
| 137 | in hex 0 ^ hex 1 ^ hex 2 ^ hex 3 ^ hex 4 end; | |
| 138 | ||
| 62666 | 139 | end; | 
| 41954 | 140 | |
| 62666 | 141 | |
| 142 | (** external implementation in C **) | |
| 143 | ||
| 144 | local | |
| 145 | ||
| 64334 | 146 | (* C library and memory *) | 
| 147 | ||
| 148 | val library_path = | |
| 149 |   Path.explode ("$ML_HOME/" ^ (if ML_System.platform_is_windows then "sha1.dll" else "libsha1.so"));
 | |
| 150 | ||
| 72554 
81518b38b316
prefer static library_call (again): avoid memory-leak within C code area;
 wenzelm parents: 
72537diff
changeset | 151 | val library_call = | 
| 72534 
e0c6522d5d43
prefer static library_call, following recent changes to structure Foreign;
 wenzelm parents: 
68087diff
changeset | 152 | Foreign.buildCall3 | 
| 72661 
fca4d6abebda
more robust library_call (again): dynamic file name, static symbol;
 wenzelm parents: 
72554diff
changeset | 153 | (Foreign.getSymbol | 
| 
fca4d6abebda
more robust library_call (again): dynamic file name, static symbol;
 wenzelm parents: 
72554diff
changeset | 154 | (Foreign.loadLibraryIndirect (fn () => File.platform_path library_path)) "sha1_buffer", | 
| 72554 
81518b38b316
prefer static library_call (again): avoid memory-leak within C code area;
 wenzelm parents: 
72537diff
changeset | 155 | (Foreign.cByteArray, Foreign.cUlong, Foreign.cPointer), Foreign.cPointer); | 
| 72535 | 156 | |
| 64334 | 157 | fun with_memory n = | 
| 158 | Thread_Attributes.uninterruptible (fn restore_attributes => fn f => | |
| 159 | let | |
| 160 | val mem = Foreign.Memory.malloc (Word.fromInt n); | |
| 161 | val res = Exn.capture (restore_attributes f) mem; | |
| 162 | val _ = Foreign.Memory.free mem; | |
| 163 | in Exn.release res end); | |
| 164 | ||
| 165 | ||
| 62666 | 166 | (* digesting *) | 
| 167 | ||
| 64334 | 168 | fun hex_string byte = op ^ (apply2 hex_digit (Integer.div_mod (Word8.toInt byte) 16)); | 
| 62666 | 169 | |
| 170 | in | |
| 171 | ||
| 172 | fun digest_string_external str = | |
| 64334 | 173 | with_memory 20 (fn mem => | 
| 174 | let | |
| 175 | val bytes = Byte.stringToBytes str; | |
| 72534 
e0c6522d5d43
prefer static library_call, following recent changes to structure Foreign;
 wenzelm parents: 
68087diff
changeset | 176 | val _ = library_call (bytes, Word8Vector.length bytes, mem); | 
| 64334 | 177 | fun get i = hex_string (Foreign.Memory.get8 (mem, Word.fromInt i)); | 
| 178 | in implode (map get (0 upto 19)) end); | |
| 62666 | 179 | |
| 180 | end; | |
| 181 | ||
| 182 | ||
| 183 | ||
| 184 | (** type digest **) | |
| 41954 | 185 | |
| 186 | datatype digest = Digest of string; | |
| 187 | ||
| 188 | fun rep (Digest s) = s; | |
| 57638 
ed58e740a699
less authentic SHA1.digest: trust Scala side on blobs and avoid re-calculation via Foreign Language Interface, which might be a cause of problems;
 wenzelm parents: 
41954diff
changeset | 189 | val fake = Digest; | 
| 
ed58e740a699
less authentic SHA1.digest: trust Scala side on blobs and avoid re-calculation via Foreign Language Interface, which might be a cause of problems;
 wenzelm parents: 
41954diff
changeset | 190 | |
| 62819 
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
 wenzelm parents: 
62702diff
changeset | 191 | val _ = ML_system_pp (fn _ => fn _ => Pretty.to_polyml o Pretty.str o quote o rep); | 
| 62663 | 192 | |
| 62666 | 193 | fun digest_string str = digest_string_external str | 
| 67202 | 194 | handle Foreign.Foreign msg => | 
| 62666 | 195 | (warning (msg ^ "\nUsing slow ML implementation of SHA1.digest"); digest_string_internal str); | 
| 196 | ||
| 197 | val digest = Digest o digest_string; | |
| 198 | ||
| 199 | ||
| 200 | ||
| 201 | (** SHA1 samples found in the wild **) | |
| 202 | ||
| 203 | local | |
| 204 | ||
| 205 | fun check (msg, key) = | |
| 206 | let val key' = rep (digest msg) in | |
| 207 | if key = key' then () | |
| 208 | else | |
| 209 |       raise Fail ("SHA1 library integrity test failed on " ^ quote msg ^ ":\n" ^
 | |
| 210 | key ^ " expected, but\n" ^ key' ^ " was found") | |
| 211 | end; | |
| 212 | ||
| 213 | in | |
| 214 | ||
| 215 | fun test_samples () = | |
| 216 | List.app check | |
| 217 |    [("", "da39a3ee5e6b4b0d3255bfef95601890afd80709"),
 | |
| 218 |     ("a", "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8"),
 | |
| 219 |     ("abc", "a9993e364706816aba3e25717850c26c9cd0d89d"),
 | |
| 220 |     ("abcdefghijklmnopqrstuvwxyz", "32d10c7b8cf96570ca04ce37f2a19d84240d3a89"),
 | |
| 221 |     ("The quick brown fox jumps over the lazy dog", "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12"),
 | |
| 222 | (replicate_string 100 "\000", "ed4a77d1b56a118938788fc53037759b6c501e3d"), | |
| 223 |     ("a\000b", "4a3dec2d1f8245280855c42db0ee4239f917fdb8"),
 | |
| 224 |     ("\000\001", "3f29546453678b855931c174a97d6c0894b8f546")];
 | |
| 225 | ||
| 35628 | 226 | end; | 
| 62666 | 227 | |
| 228 | val _ = test_samples (); | |
| 229 | ||
| 230 | end; |