diff --git a/src/hello.c b/src/hello.c deleted file mode 100644 index 8d9ebf05..00000000 --- a/src/hello.c +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include -#include -#include -#include - -#pragma comment(lib, "Ws2_32.lib") - -int main() { - printf("winsock2"); - return 0; -} \ No newline at end of file diff --git a/src/sm3_avx.c b/src/sm3_avx.c deleted file mode 100644 index a1401dee..00000000 --- a/src/sm3_avx.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Copyright 2014-2022 The GmSSL Project. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ - - -#include -#include -#include -#include - - -#ifdef SM3_SSE3 -# include -# include - -# define _mm_rotl_epi32(X,i) \ - _mm_xor_si128(_mm_slli_epi32((X),(i)), _mm_srli_epi32((X),32-(i))) -#endif - - -#define ROTL(x,n) (((x)<<(n)) | ((x)>>(32-(n)))) -#define P0(x) ((x) ^ ROL32((x), 9) ^ ROL32((x),17)) -#define P1(x) ((x) ^ ROL32((x),15) ^ ROL32((x),23)) - -#define FF00(x,y,z) ((x) ^ (y) ^ (z)) -#define FF16(x,y,z) (((x)&(y)) | ((x)&(z)) | ((y)&(z))) -#define GG00(x,y,z) ((x) ^ (y) ^ (z)) -#define GG16(x,y,z) ((((y)^(z)) & (x)) ^ (z)) - -#define R(A, B, C, D, E, F, G, H, xx) \ - SS1 = ROL32((ROL32(A, 12) + E + K[j]), 7); \ - SS2 = SS1 ^ ROL32(A, 12); \ - TT1 = FF##xx(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]); \ - TT2 = GG##xx(E, F, G) + H + SS1 + W[j]; \ - B = ROL32(B, 9); \ - H = TT1; \ - F = ROL32(F, 19); \ - D = P0(TT2); \ - j++ - -#define R8(A, B, C, D, E, F, G, H, xx) \ - R(A, B, C, D, E, F, G, H, xx); \ - R(H, A, B, C, D, E, F, G, xx); \ - R(G, H, A, B, C, D, E, F, xx); \ - R(F, G, H, A, B, C, D, E, xx); \ - R(E, F, G, H, A, B, C, D, xx); \ - R(D, E, F, G, H, A, B, C, xx); \ - R(C, D, E, F, G, H, A, B, xx); \ - R(B, C, D, E, F, G, H, A, xx) - - - -#define T00 0x79cc4519U -#define T16 0x7a879d8aU - -#define K0 0x79cc4519U -#define K1 0xf3988a32U -#define K2 0xe7311465U -#define K3 0xce6228cbU -#define K4 0x9cc45197U -#define K5 0x3988a32fU -#define K6 0x7311465eU -#define K7 0xe6228cbcU -#define K8 0xcc451979U -#define K9 0x988a32f3U -#define K10 0x311465e7U -#define K11 0x6228cbceU -#define K12 0xc451979cU -#define K13 0x88a32f39U -#define K14 0x11465e73U -#define K15 0x228cbce6U -#define K16 0x9d8a7a87U -#define K17 0x3b14f50fU -#define K18 0x7629ea1eU -#define K19 0xec53d43cU -#define K20 0xd8a7a879U -#define K21 0xb14f50f3U -#define K22 0x629ea1e7U -#define K23 0xc53d43ceU -#define K24 0x8a7a879dU -#define K25 0x14f50f3bU -#define K26 0x29ea1e76U -#define K27 0x53d43cecU -#define K28 0xa7a879d8U -#define K29 0x4f50f3b1U -#define K30 0x9ea1e762U -#define K31 0x3d43cec5U -#define K32 0x7a879d8aU -#define K33 0xf50f3b14U -#define K34 0xea1e7629U -#define K35 0xd43cec53U -#define K36 0xa879d8a7U -#define K37 0x50f3b14fU -#define K38 0xa1e7629eU -#define K39 0x43cec53dU -#define K40 0x879d8a7aU -#define K41 0x0f3b14f5U -#define K42 0x1e7629eaU -#define K43 0x3cec53d4U -#define K44 0x79d8a7a8U -#define K45 0xf3b14f50U -#define K46 0xe7629ea1U -#define K47 0xcec53d43U -#define K48 0x9d8a7a87U -#define K49 0x3b14f50fU -#define K50 0x7629ea1eU -#define K51 0xec53d43cU -#define K52 0xd8a7a879U -#define K53 0xb14f50f3U -#define K54 0x629ea1e7U -#define K55 0xc53d43ceU -#define K56 0x8a7a879dU -#define K57 0x14f50f3bU -#define K58 0x29ea1e76U -#define K59 0x53d43cecU -#define K60 0xa7a879d8U -#define K61 0x4f50f3b1U -#define K62 0x9ea1e762U -#define K63 0x3d43cec5U - -static uint32_t K[64] = { - K0, K1, K2, K3, K4, K5, K6, K7, - K8, K9, K10, K11, K12, K13, K14, K15, - K16, K17, K18, K19, K20, K21, K22, K23, - K24, K25, K26, K27, K28, K29, K30, K31, - K32, K33, K34, K35, K36, K37, K38, K39, - K40, K41, K42, K43, K44, K45, K46, K47, - K48, K49, K50, K51, K52, K53, K54, K55, - K56, K57, K58, K59, K60, K61, K62, K63, - /* - 0x79cc4519U, 0xf3988a32U, 0xe7311465U, 0xce6228cbU, - 0x9cc45197U, 0x3988a32fU, 0x7311465eU, 0xe6228cbcU, - 0xcc451979U, 0x988a32f3U, 0x311465e7U, 0x6228cbceU, - 0xc451979cU, 0x88a32f39U, 0x11465e73U, 0x228cbce6U, - 0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU, - 0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU, - 0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU, - 0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U, - 0x7a879d8aU, 0xf50f3b14U, 0xea1e7629U, 0xd43cec53U, - 0xa879d8a7U, 0x50f3b14fU, 0xa1e7629eU, 0x43cec53dU, - 0x879d8a7aU, 0x0f3b14f5U, 0x1e7629eaU, 0x3cec53d4U, - 0x79d8a7a8U, 0xf3b14f50U, 0xe7629ea1U, 0xcec53d43U, - 0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU, - 0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU, - 0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU, - 0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U, - */ -}; - -void sm3_compress_blocks_(uint32_t digest[8], const uint8_t *data, size_t blocks) -{ - uint32_t A; - uint32_t B; - uint32_t C; - uint32_t D; - uint32_t E; - uint32_t F; - uint32_t G; - uint32_t H; - uint32_t W[68]; - uint32_t SS1, SS2, TT1, TT2; - int j; - -#ifdef SM3_SSE3 - __m128i X, T, R; - __m128i M = _mm_setr_epi32(0, 0, 0, 0xffffffff); - __m128i V = _mm_setr_epi8(3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12); -#endif - - while (blocks--) { - - A = digest[0]; - B = digest[1]; - C = digest[2]; - D = digest[3]; - E = digest[4]; - F = digest[5]; - G = digest[6]; - H = digest[7]; - - -#ifdef SM3_SSE3 - - for (j = 0; j < 16; j += 4) { - X = _mm_loadu_si128((__m128i *)(data + j * 4)); - X = _mm_shuffle_epi8(X, V); - _mm_storeu_si128((__m128i *)(W + j), X); - } - - for (j = 16; j < 68; j += 4) { - /* X = (W[j - 3], W[j - 2], W[j - 1], 0) */ - X = _mm_loadu_si128((__m128i *)(W + j - 3)); - X = _mm_andnot_si128(M, X); - - X = _mm_rotl_epi32(X, 15); - T = _mm_loadu_si128((__m128i *)(W + j - 9)); - X = _mm_xor_si128(X, T); - T = _mm_loadu_si128((__m128i *)(W + j - 16)); - X = _mm_xor_si128(X, T); - - /* P1() */ - T = _mm_rotl_epi32(X, (23 - 15)); - T = _mm_xor_si128(T, X); - T = _mm_rotl_epi32(T, 15); - X = _mm_xor_si128(X, T); - - T = _mm_loadu_si128((__m128i *)(W + j - 13)); - T = _mm_rotl_epi32(T, 7); - X = _mm_xor_si128(X, T); - T = _mm_loadu_si128((__m128i *)(W + j - 6)); - X = _mm_xor_si128(X, T); - - /* W[j + 3] ^= P1(ROL32(W[j + 1], 15)) */ - R = _mm_shuffle_epi32(X, 0); - R = _mm_and_si128(R, M); - T = _mm_rotl_epi32(R, 15); - T = _mm_xor_si128(T, R); - T = _mm_rotl_epi32(T, 9); - R = _mm_xor_si128(R, T); - R = _mm_rotl_epi32(R, 6); - X = _mm_xor_si128(X, R); - - _mm_storeu_si128((__m128i *)(W + j), X); - } -#else - for (j = 0; j < 16; j++) - W[j] = GETU32(data + j*4); - - for (; j < 68; j++) - W[j] = P1(W[j - 16] ^ W[j - 9] ^ ROL32(W[j - 3], 15)) - ^ ROL32(W[j - 13], 7) ^ W[j - 6]; -#endif - - - j = 0; - -#define FULL_UNROLL -#ifdef FULL_UNROLL - R8(A, B, C, D, E, F, G, H, 00); - R8(A, B, C, D, E, F, G, H, 00); - R8(A, B, C, D, E, F, G, H, 16); - R8(A, B, C, D, E, F, G, H, 16); - R8(A, B, C, D, E, F, G, H, 16); - R8(A, B, C, D, E, F, G, H, 16); - R8(A, B, C, D, E, F, G, H, 16); - R8(A, B, C, D, E, F, G, H, 16); -#else - for (; j < 16; j++) { - SS1 = ROL32((ROL32(A, 12) + E + K(j)), 7); - SS2 = SS1 ^ ROL32(A, 12); - TT1 = FF00(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]); - TT2 = GG00(E, F, G) + H + SS1 + W[j]; - D = C; - C = ROL32(B, 9); - B = A; - A = TT1; - H = G; - G = ROL32(F, 19); - F = E; - E = P0(TT2); - } - - for (; j < 64; j++) { - SS1 = ROL32((ROL32(A, 12) + E + K(j)), 7); - SS2 = SS1 ^ ROL32(A, 12); - TT1 = FF16(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]); - TT2 = GG16(E, F, G) + H + SS1 + W[j]; - D = C; - C = ROL32(B, 9); - B = A; - A = TT1; - H = G; - G = ROL32(F, 19); - F = E; - E = P0(TT2); - } -#endif - - digest[0] ^= A; - digest[1] ^= B; - digest[2] ^= C; - digest[3] ^= D; - digest[4] ^= E; - digest[5] ^= F; - digest[6] ^= G; - digest[7] ^= H; - - data += 64; - } -} - - -void sm3_init(SM3_CTX *ctx) -{ - memset(ctx, 0, sizeof(*ctx)); - ctx->digest[0] = 0x7380166F; - ctx->digest[1] = 0x4914B2B9; - ctx->digest[2] = 0x172442D7; - ctx->digest[3] = 0xDA8A0600; - ctx->digest[4] = 0xA96F30BC; - ctx->digest[5] = 0x163138AA; - ctx->digest[6] = 0xE38DEE4D; - ctx->digest[7] = 0xB0FB0E4E; -} - -void sm3_update(SM3_CTX *ctx, const uint8_t *data, size_t data_len) -{ - size_t blocks; - - ctx->num &= 0x3f; - if (ctx->num) { - unsigned int left = SM3_BLOCK_SIZE - ctx->num; - if (data_len < left) { - memcpy(ctx->block + ctx->num, data, data_len); - ctx->num += data_len; - return; - } else { - memcpy(ctx->block + ctx->num, data, left); - sm3_compress_blocks(ctx->digest, ctx->block, 1); - ctx->nblocks++; - data += left; - data_len -= left; - } - } - - blocks = data_len / SM3_BLOCK_SIZE; - // 这里如果blocks==0, sm3_compress_blocks是汇编实现,那么会导致汇编SegFault - if (blocks) { - sm3_compress_blocks(ctx->digest, data, (int)blocks); - ctx->nblocks += blocks; - data += SM3_BLOCK_SIZE * blocks; - data_len -= SM3_BLOCK_SIZE * blocks; - } - ctx->num = data_len; - if (data_len) { - memcpy(ctx->block, data, data_len); - } -} - -void sm3_finish(SM3_CTX *ctx, uint8_t *digest) -{ - int i; - - ctx->num &= 0x3f; - ctx->block[ctx->num] = 0x80; - - if (ctx->num <= SM3_BLOCK_SIZE - 9) { - memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 9); - } else { - memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 1); - sm3_compress_blocks(ctx->digest, ctx->block, 1); - memset(ctx->block, 0, SM3_BLOCK_SIZE - 8); - } - PUTU32(ctx->block + 56, ctx->nblocks >> 23); - PUTU32(ctx->block + 60, (ctx->nblocks << 9) + (ctx->num << 3)); - - sm3_compress_blocks(ctx->digest, ctx->block, 1); - for (i = 0; i < 8; i++) { - PUTU32(digest + i*4, ctx->digest[i]); - } - memset(ctx, 0, sizeof(SM3_CTX)); -} - -void sm3_digest(const uint8_t *msg, size_t msglen, - uint8_t dgst[SM3_DIGEST_SIZE]) -{ - SM3_CTX ctx; - sm3_init(&ctx); - sm3_update(&ctx, msg, msglen); - sm3_finish(&ctx, dgst); -} diff --git a/src/sm4_avx.c b/src/sm4_avx.c deleted file mode 100644 index 1204cdea..00000000 --- a/src/sm4_avx.c +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Copyright 2014-2022 The GmSSL Project. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ - - -#include -#include -#include -#include - -void sm4_cbc_encrypt(const SM4_KEY *key, const uint8_t iv[16], - const uint8_t *in, size_t nblocks, uint8_t *out) -{ - while (nblocks--) { - gmssl_memxor(out, in, iv, 16); - sm4_encrypt(key, out, out); - iv = out; - in += 16; - out += 16; - } -} - -void sm4_cbc_decrypt(const SM4_KEY *key, const uint8_t iv[16], - const uint8_t *in, size_t nblocks, uint8_t *out) -{ - while (nblocks--) { - sm4_encrypt(key, in, out); - memxor(out, iv, 16); - iv = in; - in += 16; - out += 16; - } -} - -int sm4_cbc_padding_encrypt(const SM4_KEY *key, const uint8_t iv[16], - const uint8_t *in, size_t inlen, - uint8_t *out, size_t *outlen) -{ - uint8_t block[16]; - size_t rem = inlen % 16; - int padding = 16 - inlen % 16; - - if (in) { - memcpy(block, in + inlen - rem, rem); - } - memset(block + rem, padding, padding); - if (inlen/16) { - sm4_cbc_encrypt(key, iv, in, inlen/16, out); - out += inlen - rem; - iv = out - 16; - } - sm4_cbc_encrypt(key, iv, block, 1, out); - *outlen = inlen - rem + 16; - return 1; -} - -int sm4_cbc_padding_decrypt(const SM4_KEY *key, const uint8_t iv[16], - const uint8_t *in, size_t inlen, - uint8_t *out, size_t *outlen) -{ - uint8_t block[16]; - size_t len = sizeof(block); - int padding; - - if (inlen == 0) { - error_puts("warning: input lenght = 0"); - return 0; - } - if (inlen%16 != 0 || inlen < 16) { - error_puts("invalid cbc ciphertext length"); - return -1; - } - if (inlen > 16) { - sm4_cbc_decrypt(key, iv, in, inlen/16 - 1, out); - iv = in + inlen - 32; - } - sm4_cbc_decrypt(key, iv, in + inlen - 16, 1, block); - - padding = block[15]; - if (padding < 1 || padding > 16) { - error_print(); - return -1; - } - len -= padding; - memcpy(out + inlen - 16, block, len); - *outlen = inlen - padding; - return 1; -} - -static void ctr_incr(uint8_t a[16]) -{ - int i; - for (i = 15; i >= 0; i--) { - a[i]++; - if (a[i]) break; - } -} - -// 这个函数支持任意长度的输入,如果输入的长度不是整数长度,那么调用会出现错误 -// 如果输出的长度正好可以凑够4个分组,那么我们就可以一次性的加密4个分组 -// 我们还是应该先准备一个底层的封装,就是CTR模式,给定一个ctr,然后我们输出的是4个分组,并且对ctr做一个变化 - -void sm4_encrypt4(const uint32_t rk[32], void *src, const void *dst); - -void sm4_ctr_encrypt(const SM4_KEY *key, uint8_t ctr[16], const uint8_t *in, size_t inlen, uint8_t *out) -{ - uint8_t blocks[64]; - - while (inlen >= 64) { - memcpy(blocks, ctr, 16); ctr_incr(ctr); - memcpy(blocks + 16, ctr, 16); ctr_incr(ctr); - memcpy(blocks + 32, ctr, 16); ctr_incr(ctr); - memcpy(blocks + 48, ctr, 16); ctr_incr(ctr); - sm4_encrypt4(key->rk, blocks, blocks); - gmssl_memxor(out, in, blocks, 64); - in += 64; - out += 64; - inlen -= 64; - } - - while (inlen) { - size_t len = inlen < 16 ? inlen : 16; - sm4_encrypt(key, ctr, blocks); - gmssl_memxor(out, in, blocks, len); - ctr_incr(ctr); - in += len; - out += len; - inlen -= len; - } -} - -/* -void sm4_ctr_encrypt(const SM4_KEY *key, uint8_t ctr[16], const uint8_t *in, size_t inlen, uint8_t *out) -{ - uint8_t block[16]; - size_t len; - - while (inlen) { - len = inlen < 16 ? inlen : 16; - sm4_encrypt(key, ctr, block); - gmssl_memxor(out, in, block, len); - ctr_incr(ctr); - in += len; - out += len; - inlen -= len; - } -} -*/ - -int sm4_gcm_encrypt(const SM4_KEY *key, const uint8_t *iv, size_t ivlen, - const uint8_t *aad, size_t aadlen, const uint8_t *in, size_t inlen, - uint8_t *out, size_t taglen, uint8_t *tag) -{ - const uint8_t *pin = in; - uint8_t *pout = out; - size_t left = inlen; - uint8_t H[16] = {0}; - uint8_t Y[16]; - uint8_t T[16]; - - if (taglen > SM4_GCM_MAX_TAG_SIZE) { - error_print(); - return -1; - } - - sm4_encrypt(key, H, H); - - if (ivlen == 12) { - memcpy(Y, iv, 12); - Y[12] = Y[13] = Y[14] = 0; - Y[15] = 1; - } else { - ghash(H, NULL, 0, iv, ivlen, Y); - } - - sm4_encrypt(key, Y, T); - - while (left) { - uint8_t block[16]; - size_t len = left < 16 ? left : 16; - ctr_incr(Y); - sm4_encrypt(key, Y, block); - gmssl_memxor(pout, pin, block, len); - pin += len; - pout += len; - left -= len; - } - - ghash(H, aad, aadlen, out, inlen, H); - gmssl_memxor(tag, T, H, taglen); - return 1; -} - -int sm4_gcm_decrypt(const SM4_KEY *key, const uint8_t *iv, size_t ivlen, - const uint8_t *aad, size_t aadlen, const uint8_t *in, size_t inlen, - const uint8_t *tag, size_t taglen, uint8_t *out) -{ - const uint8_t *pin = in; - uint8_t *pout = out; - size_t left = inlen; - uint8_t H[16] = {0}; - uint8_t Y[16]; - uint8_t T[16]; - - sm4_encrypt(key, H, H); - - if (ivlen == 12) { - memcpy(Y, iv, 12); - Y[12] = Y[13] = Y[14] = 0; - Y[15] = 1; - } else { - ghash(H, NULL, 0, iv, ivlen, Y); - } - - ghash(H, aad, aadlen, in, inlen, H); - sm4_encrypt(key, Y, T); - gmssl_memxor(T, T, H, taglen); - if (memcmp(T, tag, taglen) != 0) { - error_print(); - return -1; - } - - while (left) { - uint8_t block[16]; - size_t len = left < 16 ? left : 16; - ctr_incr(Y); - sm4_encrypt(key, Y, block); - gmssl_memxor(pout, pin, block, len); - pin += len; - pout += len; - left -= len; - } - return 1; -} - -int sm4_cbc_encrypt_init(SM4_CBC_CTX *ctx, - const uint8_t key[SM4_BLOCK_SIZE], const uint8_t iv[SM4_BLOCK_SIZE]) -{ - sm4_set_encrypt_key(&ctx->sm4_key, key); - memcpy(ctx->iv, iv, SM4_BLOCK_SIZE); - memset(ctx->block, 0, SM4_BLOCK_SIZE); - ctx->block_nbytes = 0; - return 1; -} - -int sm4_cbc_encrypt_update(SM4_CBC_CTX *ctx, - const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen) -{ - size_t left; - size_t nblocks; - size_t len; - - if (ctx->block_nbytes >= SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - *outlen = 0; - if (ctx->block_nbytes) { - left = SM4_BLOCK_SIZE - ctx->block_nbytes; - if (inlen < left) { - memcpy(ctx->block + ctx->block_nbytes, in, inlen); - ctx->block_nbytes += inlen; - return 1; - } - memcpy(ctx->block + ctx->block_nbytes, in, left); - sm4_cbc_encrypt(&ctx->sm4_key, ctx->iv, ctx->block, 1, out); - memcpy(ctx->iv, out, SM4_BLOCK_SIZE); - in += left; - inlen -= left; - out += SM4_BLOCK_SIZE; - *outlen += SM4_BLOCK_SIZE; - } - if (inlen >= SM4_BLOCK_SIZE) { - nblocks = inlen / SM4_BLOCK_SIZE; - len = nblocks * SM4_BLOCK_SIZE; - sm4_cbc_encrypt(&ctx->sm4_key, ctx->iv, in, nblocks, out); - memcpy(ctx->iv, out + len - SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); - in += len; - inlen -= len; - out += len; - *outlen += len; - } - if (inlen) { - memcpy(ctx->block, in, inlen); - } - ctx->block_nbytes = inlen; - return 1; -} - -int sm4_cbc_encrypt_finish(SM4_CBC_CTX *ctx, uint8_t *out, size_t *outlen) -{ - size_t left; - size_t i; - - if (ctx->block_nbytes >= SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - if (sm4_cbc_padding_encrypt(&ctx->sm4_key, ctx->iv, ctx->block, ctx->block_nbytes, out, outlen) != 1) { - error_print(); - return -1; - } - return 1; -} - -int sm4_cbc_decrypt_init(SM4_CBC_CTX *ctx, - const uint8_t key[SM4_BLOCK_SIZE], const uint8_t iv[SM4_BLOCK_SIZE]) -{ - sm4_set_decrypt_key(&ctx->sm4_key, key); - memcpy(ctx->iv, iv, SM4_BLOCK_SIZE); - memset(ctx->block, 0, SM4_BLOCK_SIZE); - ctx->block_nbytes = 0; - return 1; -} - -int sm4_cbc_decrypt_update(SM4_CBC_CTX *ctx, - const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen) -{ - size_t left, len, nblocks; - - if (ctx->block_nbytes > SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - - *outlen = 0; - if (ctx->block_nbytes) { - left = SM4_BLOCK_SIZE - ctx->block_nbytes; - if (inlen <= left) { - memcpy(ctx->block + ctx->block_nbytes, in, inlen); - ctx->block_nbytes += inlen; - return 1; - } - memcpy(ctx->block + ctx->block_nbytes, in, left); - sm4_cbc_decrypt(&ctx->sm4_key, ctx->iv, ctx->block, 1, out); - memcpy(ctx->iv, ctx->block, SM4_BLOCK_SIZE); - in += left; - inlen -= left; - out += SM4_BLOCK_SIZE; - *outlen += SM4_BLOCK_SIZE; - } - if (inlen > SM4_BLOCK_SIZE) { - nblocks = (inlen-1) / SM4_BLOCK_SIZE; - len = nblocks * SM4_BLOCK_SIZE; - sm4_cbc_decrypt(&ctx->sm4_key, ctx->iv, in, nblocks, out); - memcpy(ctx->iv, in + len - SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); - in += len; - inlen -= len; - out += len; - *outlen += len; - } - memcpy(ctx->block, in, inlen); - ctx->block_nbytes = inlen; - return 1; -} - -int sm4_cbc_decrypt_finish(SM4_CBC_CTX *ctx, uint8_t *out, size_t *outlen) -{ - if (ctx->block_nbytes != SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - if (sm4_cbc_padding_decrypt(&ctx->sm4_key, ctx->iv, ctx->block, SM4_BLOCK_SIZE, out, outlen) != 1) { - error_print(); - return -1; - } - return 1; -} - -int sm4_ctr_encrypt_init(SM4_CTR_CTX *ctx, - const uint8_t key[SM4_BLOCK_SIZE], const uint8_t ctr[SM4_BLOCK_SIZE]) -{ - sm4_set_encrypt_key(&ctx->sm4_key, key); - memcpy(ctx->ctr, ctr, SM4_BLOCK_SIZE); - memset(ctx->block, 0, SM4_BLOCK_SIZE); - ctx->block_nbytes = 0; - return 1; -} - -int sm4_ctr_encrypt_update(SM4_CTR_CTX *ctx, - const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen) -{ - size_t left; - size_t nblocks; - size_t len; - - if (ctx->block_nbytes >= SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - *outlen = 0; - if (ctx->block_nbytes) { - left = SM4_BLOCK_SIZE - ctx->block_nbytes; - if (inlen < left) { - memcpy(ctx->block + ctx->block_nbytes, in, inlen); - ctx->block_nbytes += inlen; - return 1; - } - memcpy(ctx->block + ctx->block_nbytes, in, left); - sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, ctx->block, SM4_BLOCK_SIZE, out); - in += left; - inlen -= left; - out += SM4_BLOCK_SIZE; - *outlen += SM4_BLOCK_SIZE; - } - if (inlen >= SM4_BLOCK_SIZE) { - nblocks = inlen / SM4_BLOCK_SIZE; - len = nblocks * SM4_BLOCK_SIZE; - sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, in, len, out); - in += len; - inlen -= len; - out += len; - *outlen += len; - } - if (inlen) { - memcpy(ctx->block, in, inlen); - } - ctx->block_nbytes = inlen; - return 1; -} - -int sm4_ctr_encrypt_finish(SM4_CTR_CTX *ctx, uint8_t *out, size_t *outlen) -{ - size_t left; - if (ctx->block_nbytes >= SM4_BLOCK_SIZE) { - error_print(); - return -1; - } - sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, ctx->block, ctx->block_nbytes, out); - *outlen = ctx->block_nbytes; - return 1; -} diff --git a/src/sm4ni.c b/src/sm4ni.c deleted file mode 100644 index 9e109a71..00000000 --- a/src/sm4ni.c +++ /dev/null @@ -1,120 +0,0 @@ -// sm4ni.c -// 2018-04-20 Markku-Juhani O. Saarinen - -// Vectorized implementation of SM4. Uses affine transformations and AES NI -// to implement the SM4 S-Box. - -//#include "sm4_ref.h" -#include - -// Encrypt 4 blocks (64 bytes) in ECB mode - -void sm4_encrypt4(const uint32_t rk[32], void *src, const void *dst) -{ - // nibble mask - const __m128i c0f __attribute__((aligned(0x10))) = - { 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F }; - - // flip all bytes in all 32-bit words - const __m128i flp __attribute__((aligned(0x10))) = - { 0x0405060700010203, 0x0C0D0E0F08090A0B }; - - // inverse shift rows - const __m128i shr __attribute__((aligned(0x10))) = - { 0x0B0E0104070A0D00, 0x0306090C0F020508 }; - - // Affine transform 1 (low and high hibbles) - const __m128i m1l __attribute__((aligned(0x10))) = - { 0x9197E2E474720701, 0xC7C1B4B222245157 }; - const __m128i m1h __attribute__((aligned(0x10))) = - { 0xE240AB09EB49A200, 0xF052B91BF95BB012 }; - - // Affine transform 2 (low and high hibbles) - const __m128i m2l __attribute__((aligned(0x10))) = - { 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 }; - const __m128i m2h __attribute__((aligned(0x10))) = - { 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF }; - - // left rotations of 32-bit words by 8-bit increments - const __m128i r08 __attribute__((aligned(0x10))) = - { 0x0605040702010003, 0x0E0D0C0F0A09080B }; - const __m128i r16 __attribute__((aligned(0x10))) = - { 0x0504070601000302, 0x0D0C0F0E09080B0A }; - const __m128i r24 __attribute__((aligned(0x10))) = - { 0x0407060500030201, 0x0C0F0E0D080B0A09 }; - - __m128i x, y, t0, t1, t2, t3; - - uint32_t k, *p32, v[4] __attribute__((aligned(0x10))); - int i; - - p32 = (uint32_t *) src; - t0 = _mm_set_epi32(p32[12], p32[ 8], p32[ 4], p32[ 0]); - t0 = _mm_shuffle_epi8(t0, flp); - t1 = _mm_set_epi32(p32[13], p32[ 9], p32[ 5], p32[ 1]); - t1 = _mm_shuffle_epi8(t1, flp); - t2 = _mm_set_epi32(p32[14], p32[10], p32[ 6], p32[ 2]); - t2 = _mm_shuffle_epi8(t2, flp); - t3 = _mm_set_epi32(p32[15], p32[11], p32[ 7], p32[ 3]); - t3 = _mm_shuffle_epi8(t3, flp); - - for (i = 0; i < 32; i++) { - - k = rk[i]; - x = t1 ^ t2 ^ t3 ^ _mm_set_epi32(k, k, k, k); - - y = _mm_and_si128(x, c0f); // inner affine - y = _mm_shuffle_epi8(m1l, y); - x = _mm_srli_epi64(x, 4); - x = _mm_and_si128(x, c0f); - x = _mm_shuffle_epi8(m1h, x) ^ y; - - x = _mm_shuffle_epi8(x, shr); // inverse MixColumns - x = _mm_aesenclast_si128(x, c0f); // AESNI instruction - - y = _mm_andnot_si128(x, c0f); // outer affine - y = _mm_shuffle_epi8(m2l, y); - x = _mm_srli_epi64(x, 4); - x = _mm_and_si128(x, c0f); - x = _mm_shuffle_epi8(m2h, x) ^ y; - - // 4 parallel L1 linear transforms - y = x ^ _mm_shuffle_epi8(x, r08) ^ _mm_shuffle_epi8(x, r16); - y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30); - x = x ^ y ^ _mm_shuffle_epi8(x, r24); - - // rotate registers - x ^= t0; - t0 = t1; - t1 = t2; - t2 = t3; - t3 = x; - } - - p32 = (uint32_t *) dst; - - _mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t3, flp)); - p32[ 0] = v[0]; - p32[ 4] = v[1]; - p32[ 8] = v[2]; - p32[12] = v[3]; - - _mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t2, flp)); - p32[ 1] = v[0]; - p32[ 5] = v[1]; - p32[ 9] = v[2]; - p32[13] = v[3]; - - _mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t1, flp)); - p32[ 2] = v[0]; - p32[ 6] = v[1]; - p32[10] = v[2]; - p32[14] = v[3]; - - _mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t0, flp)); - p32[ 3] = v[0]; - p32[ 7] = v[1]; - p32[11] = v[2]; - p32[15] = v[3]; -} -