mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-05-07 00:46:17 +08:00
remove files
This commit is contained in:
12
src/hello.c
12
src/hello.c
@@ -1,12 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <winsock2.h>
|
||||
#include <ws2tcpip.h>
|
||||
|
||||
#pragma comment(lib, "Ws2_32.lib")
|
||||
|
||||
int main() {
|
||||
printf("winsock2");
|
||||
return 0;
|
||||
}
|
||||
376
src/sm3_avx.c
376
src/sm3_avx.c
@@ -1,376 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014-2022 The GmSSL Project. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <gmssl/sm3.h>
|
||||
#include <gmssl/endian.h>
|
||||
#include <gmssl/error.h>
|
||||
|
||||
|
||||
#ifdef SM3_SSE3
|
||||
# include <x86intrin.h>
|
||||
# include <immintrin.h>
|
||||
|
||||
# define _mm_rotl_epi32(X,i) \
|
||||
_mm_xor_si128(_mm_slli_epi32((X),(i)), _mm_srli_epi32((X),32-(i)))
|
||||
#endif
|
||||
|
||||
|
||||
#define ROTL(x,n) (((x)<<(n)) | ((x)>>(32-(n))))
|
||||
#define P0(x) ((x) ^ ROL32((x), 9) ^ ROL32((x),17))
|
||||
#define P1(x) ((x) ^ ROL32((x),15) ^ ROL32((x),23))
|
||||
|
||||
#define FF00(x,y,z) ((x) ^ (y) ^ (z))
|
||||
#define FF16(x,y,z) (((x)&(y)) | ((x)&(z)) | ((y)&(z)))
|
||||
#define GG00(x,y,z) ((x) ^ (y) ^ (z))
|
||||
#define GG16(x,y,z) ((((y)^(z)) & (x)) ^ (z))
|
||||
|
||||
#define R(A, B, C, D, E, F, G, H, xx) \
|
||||
SS1 = ROL32((ROL32(A, 12) + E + K[j]), 7); \
|
||||
SS2 = SS1 ^ ROL32(A, 12); \
|
||||
TT1 = FF##xx(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]); \
|
||||
TT2 = GG##xx(E, F, G) + H + SS1 + W[j]; \
|
||||
B = ROL32(B, 9); \
|
||||
H = TT1; \
|
||||
F = ROL32(F, 19); \
|
||||
D = P0(TT2); \
|
||||
j++
|
||||
|
||||
#define R8(A, B, C, D, E, F, G, H, xx) \
|
||||
R(A, B, C, D, E, F, G, H, xx); \
|
||||
R(H, A, B, C, D, E, F, G, xx); \
|
||||
R(G, H, A, B, C, D, E, F, xx); \
|
||||
R(F, G, H, A, B, C, D, E, xx); \
|
||||
R(E, F, G, H, A, B, C, D, xx); \
|
||||
R(D, E, F, G, H, A, B, C, xx); \
|
||||
R(C, D, E, F, G, H, A, B, xx); \
|
||||
R(B, C, D, E, F, G, H, A, xx)
|
||||
|
||||
|
||||
|
||||
#define T00 0x79cc4519U
|
||||
#define T16 0x7a879d8aU
|
||||
|
||||
#define K0 0x79cc4519U
|
||||
#define K1 0xf3988a32U
|
||||
#define K2 0xe7311465U
|
||||
#define K3 0xce6228cbU
|
||||
#define K4 0x9cc45197U
|
||||
#define K5 0x3988a32fU
|
||||
#define K6 0x7311465eU
|
||||
#define K7 0xe6228cbcU
|
||||
#define K8 0xcc451979U
|
||||
#define K9 0x988a32f3U
|
||||
#define K10 0x311465e7U
|
||||
#define K11 0x6228cbceU
|
||||
#define K12 0xc451979cU
|
||||
#define K13 0x88a32f39U
|
||||
#define K14 0x11465e73U
|
||||
#define K15 0x228cbce6U
|
||||
#define K16 0x9d8a7a87U
|
||||
#define K17 0x3b14f50fU
|
||||
#define K18 0x7629ea1eU
|
||||
#define K19 0xec53d43cU
|
||||
#define K20 0xd8a7a879U
|
||||
#define K21 0xb14f50f3U
|
||||
#define K22 0x629ea1e7U
|
||||
#define K23 0xc53d43ceU
|
||||
#define K24 0x8a7a879dU
|
||||
#define K25 0x14f50f3bU
|
||||
#define K26 0x29ea1e76U
|
||||
#define K27 0x53d43cecU
|
||||
#define K28 0xa7a879d8U
|
||||
#define K29 0x4f50f3b1U
|
||||
#define K30 0x9ea1e762U
|
||||
#define K31 0x3d43cec5U
|
||||
#define K32 0x7a879d8aU
|
||||
#define K33 0xf50f3b14U
|
||||
#define K34 0xea1e7629U
|
||||
#define K35 0xd43cec53U
|
||||
#define K36 0xa879d8a7U
|
||||
#define K37 0x50f3b14fU
|
||||
#define K38 0xa1e7629eU
|
||||
#define K39 0x43cec53dU
|
||||
#define K40 0x879d8a7aU
|
||||
#define K41 0x0f3b14f5U
|
||||
#define K42 0x1e7629eaU
|
||||
#define K43 0x3cec53d4U
|
||||
#define K44 0x79d8a7a8U
|
||||
#define K45 0xf3b14f50U
|
||||
#define K46 0xe7629ea1U
|
||||
#define K47 0xcec53d43U
|
||||
#define K48 0x9d8a7a87U
|
||||
#define K49 0x3b14f50fU
|
||||
#define K50 0x7629ea1eU
|
||||
#define K51 0xec53d43cU
|
||||
#define K52 0xd8a7a879U
|
||||
#define K53 0xb14f50f3U
|
||||
#define K54 0x629ea1e7U
|
||||
#define K55 0xc53d43ceU
|
||||
#define K56 0x8a7a879dU
|
||||
#define K57 0x14f50f3bU
|
||||
#define K58 0x29ea1e76U
|
||||
#define K59 0x53d43cecU
|
||||
#define K60 0xa7a879d8U
|
||||
#define K61 0x4f50f3b1U
|
||||
#define K62 0x9ea1e762U
|
||||
#define K63 0x3d43cec5U
|
||||
|
||||
static uint32_t K[64] = {
|
||||
K0, K1, K2, K3, K4, K5, K6, K7,
|
||||
K8, K9, K10, K11, K12, K13, K14, K15,
|
||||
K16, K17, K18, K19, K20, K21, K22, K23,
|
||||
K24, K25, K26, K27, K28, K29, K30, K31,
|
||||
K32, K33, K34, K35, K36, K37, K38, K39,
|
||||
K40, K41, K42, K43, K44, K45, K46, K47,
|
||||
K48, K49, K50, K51, K52, K53, K54, K55,
|
||||
K56, K57, K58, K59, K60, K61, K62, K63,
|
||||
/*
|
||||
0x79cc4519U, 0xf3988a32U, 0xe7311465U, 0xce6228cbU,
|
||||
0x9cc45197U, 0x3988a32fU, 0x7311465eU, 0xe6228cbcU,
|
||||
0xcc451979U, 0x988a32f3U, 0x311465e7U, 0x6228cbceU,
|
||||
0xc451979cU, 0x88a32f39U, 0x11465e73U, 0x228cbce6U,
|
||||
0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU,
|
||||
0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU,
|
||||
0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU,
|
||||
0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U,
|
||||
0x7a879d8aU, 0xf50f3b14U, 0xea1e7629U, 0xd43cec53U,
|
||||
0xa879d8a7U, 0x50f3b14fU, 0xa1e7629eU, 0x43cec53dU,
|
||||
0x879d8a7aU, 0x0f3b14f5U, 0x1e7629eaU, 0x3cec53d4U,
|
||||
0x79d8a7a8U, 0xf3b14f50U, 0xe7629ea1U, 0xcec53d43U,
|
||||
0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU,
|
||||
0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU,
|
||||
0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU,
|
||||
0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U,
|
||||
*/
|
||||
};
|
||||
|
||||
void sm3_compress_blocks_(uint32_t digest[8], const uint8_t *data, size_t blocks)
|
||||
{
|
||||
uint32_t A;
|
||||
uint32_t B;
|
||||
uint32_t C;
|
||||
uint32_t D;
|
||||
uint32_t E;
|
||||
uint32_t F;
|
||||
uint32_t G;
|
||||
uint32_t H;
|
||||
uint32_t W[68];
|
||||
uint32_t SS1, SS2, TT1, TT2;
|
||||
int j;
|
||||
|
||||
#ifdef SM3_SSE3
|
||||
__m128i X, T, R;
|
||||
__m128i M = _mm_setr_epi32(0, 0, 0, 0xffffffff);
|
||||
__m128i V = _mm_setr_epi8(3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12);
|
||||
#endif
|
||||
|
||||
while (blocks--) {
|
||||
|
||||
A = digest[0];
|
||||
B = digest[1];
|
||||
C = digest[2];
|
||||
D = digest[3];
|
||||
E = digest[4];
|
||||
F = digest[5];
|
||||
G = digest[6];
|
||||
H = digest[7];
|
||||
|
||||
|
||||
#ifdef SM3_SSE3
|
||||
|
||||
for (j = 0; j < 16; j += 4) {
|
||||
X = _mm_loadu_si128((__m128i *)(data + j * 4));
|
||||
X = _mm_shuffle_epi8(X, V);
|
||||
_mm_storeu_si128((__m128i *)(W + j), X);
|
||||
}
|
||||
|
||||
for (j = 16; j < 68; j += 4) {
|
||||
/* X = (W[j - 3], W[j - 2], W[j - 1], 0) */
|
||||
X = _mm_loadu_si128((__m128i *)(W + j - 3));
|
||||
X = _mm_andnot_si128(M, X);
|
||||
|
||||
X = _mm_rotl_epi32(X, 15);
|
||||
T = _mm_loadu_si128((__m128i *)(W + j - 9));
|
||||
X = _mm_xor_si128(X, T);
|
||||
T = _mm_loadu_si128((__m128i *)(W + j - 16));
|
||||
X = _mm_xor_si128(X, T);
|
||||
|
||||
/* P1() */
|
||||
T = _mm_rotl_epi32(X, (23 - 15));
|
||||
T = _mm_xor_si128(T, X);
|
||||
T = _mm_rotl_epi32(T, 15);
|
||||
X = _mm_xor_si128(X, T);
|
||||
|
||||
T = _mm_loadu_si128((__m128i *)(W + j - 13));
|
||||
T = _mm_rotl_epi32(T, 7);
|
||||
X = _mm_xor_si128(X, T);
|
||||
T = _mm_loadu_si128((__m128i *)(W + j - 6));
|
||||
X = _mm_xor_si128(X, T);
|
||||
|
||||
/* W[j + 3] ^= P1(ROL32(W[j + 1], 15)) */
|
||||
R = _mm_shuffle_epi32(X, 0);
|
||||
R = _mm_and_si128(R, M);
|
||||
T = _mm_rotl_epi32(R, 15);
|
||||
T = _mm_xor_si128(T, R);
|
||||
T = _mm_rotl_epi32(T, 9);
|
||||
R = _mm_xor_si128(R, T);
|
||||
R = _mm_rotl_epi32(R, 6);
|
||||
X = _mm_xor_si128(X, R);
|
||||
|
||||
_mm_storeu_si128((__m128i *)(W + j), X);
|
||||
}
|
||||
#else
|
||||
for (j = 0; j < 16; j++)
|
||||
W[j] = GETU32(data + j*4);
|
||||
|
||||
for (; j < 68; j++)
|
||||
W[j] = P1(W[j - 16] ^ W[j - 9] ^ ROL32(W[j - 3], 15))
|
||||
^ ROL32(W[j - 13], 7) ^ W[j - 6];
|
||||
#endif
|
||||
|
||||
|
||||
j = 0;
|
||||
|
||||
#define FULL_UNROLL
|
||||
#ifdef FULL_UNROLL
|
||||
R8(A, B, C, D, E, F, G, H, 00);
|
||||
R8(A, B, C, D, E, F, G, H, 00);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
R8(A, B, C, D, E, F, G, H, 16);
|
||||
#else
|
||||
for (; j < 16; j++) {
|
||||
SS1 = ROL32((ROL32(A, 12) + E + K(j)), 7);
|
||||
SS2 = SS1 ^ ROL32(A, 12);
|
||||
TT1 = FF00(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]);
|
||||
TT2 = GG00(E, F, G) + H + SS1 + W[j];
|
||||
D = C;
|
||||
C = ROL32(B, 9);
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = ROL32(F, 19);
|
||||
F = E;
|
||||
E = P0(TT2);
|
||||
}
|
||||
|
||||
for (; j < 64; j++) {
|
||||
SS1 = ROL32((ROL32(A, 12) + E + K(j)), 7);
|
||||
SS2 = SS1 ^ ROL32(A, 12);
|
||||
TT1 = FF16(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]);
|
||||
TT2 = GG16(E, F, G) + H + SS1 + W[j];
|
||||
D = C;
|
||||
C = ROL32(B, 9);
|
||||
B = A;
|
||||
A = TT1;
|
||||
H = G;
|
||||
G = ROL32(F, 19);
|
||||
F = E;
|
||||
E = P0(TT2);
|
||||
}
|
||||
#endif
|
||||
|
||||
digest[0] ^= A;
|
||||
digest[1] ^= B;
|
||||
digest[2] ^= C;
|
||||
digest[3] ^= D;
|
||||
digest[4] ^= E;
|
||||
digest[5] ^= F;
|
||||
digest[6] ^= G;
|
||||
digest[7] ^= H;
|
||||
|
||||
data += 64;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void sm3_init(SM3_CTX *ctx)
|
||||
{
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
ctx->digest[0] = 0x7380166F;
|
||||
ctx->digest[1] = 0x4914B2B9;
|
||||
ctx->digest[2] = 0x172442D7;
|
||||
ctx->digest[3] = 0xDA8A0600;
|
||||
ctx->digest[4] = 0xA96F30BC;
|
||||
ctx->digest[5] = 0x163138AA;
|
||||
ctx->digest[6] = 0xE38DEE4D;
|
||||
ctx->digest[7] = 0xB0FB0E4E;
|
||||
}
|
||||
|
||||
void sm3_update(SM3_CTX *ctx, const uint8_t *data, size_t data_len)
|
||||
{
|
||||
size_t blocks;
|
||||
|
||||
ctx->num &= 0x3f;
|
||||
if (ctx->num) {
|
||||
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
||||
if (data_len < left) {
|
||||
memcpy(ctx->block + ctx->num, data, data_len);
|
||||
ctx->num += data_len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(ctx->block + ctx->num, data, left);
|
||||
sm3_compress_blocks(ctx->digest, ctx->block, 1);
|
||||
ctx->nblocks++;
|
||||
data += left;
|
||||
data_len -= left;
|
||||
}
|
||||
}
|
||||
|
||||
blocks = data_len / SM3_BLOCK_SIZE;
|
||||
// 这里如果blocks==0, sm3_compress_blocks是汇编实现,那么会导致汇编SegFault
|
||||
if (blocks) {
|
||||
sm3_compress_blocks(ctx->digest, data, (int)blocks);
|
||||
ctx->nblocks += blocks;
|
||||
data += SM3_BLOCK_SIZE * blocks;
|
||||
data_len -= SM3_BLOCK_SIZE * blocks;
|
||||
}
|
||||
ctx->num = data_len;
|
||||
if (data_len) {
|
||||
memcpy(ctx->block, data, data_len);
|
||||
}
|
||||
}
|
||||
|
||||
void sm3_finish(SM3_CTX *ctx, uint8_t *digest)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctx->num &= 0x3f;
|
||||
ctx->block[ctx->num] = 0x80;
|
||||
|
||||
if (ctx->num <= SM3_BLOCK_SIZE - 9) {
|
||||
memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 9);
|
||||
} else {
|
||||
memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 1);
|
||||
sm3_compress_blocks(ctx->digest, ctx->block, 1);
|
||||
memset(ctx->block, 0, SM3_BLOCK_SIZE - 8);
|
||||
}
|
||||
PUTU32(ctx->block + 56, ctx->nblocks >> 23);
|
||||
PUTU32(ctx->block + 60, (ctx->nblocks << 9) + (ctx->num << 3));
|
||||
|
||||
sm3_compress_blocks(ctx->digest, ctx->block, 1);
|
||||
for (i = 0; i < 8; i++) {
|
||||
PUTU32(digest + i*4, ctx->digest[i]);
|
||||
}
|
||||
memset(ctx, 0, sizeof(SM3_CTX));
|
||||
}
|
||||
|
||||
void sm3_digest(const uint8_t *msg, size_t msglen,
|
||||
uint8_t dgst[SM3_DIGEST_SIZE])
|
||||
{
|
||||
SM3_CTX ctx;
|
||||
sm3_init(&ctx);
|
||||
sm3_update(&ctx, msg, msglen);
|
||||
sm3_finish(&ctx, dgst);
|
||||
}
|
||||
437
src/sm4_avx.c
437
src/sm4_avx.c
@@ -1,437 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014-2022 The GmSSL Project. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include <gmssl/sm4.h>
|
||||
#include <gmssl/mem.h>
|
||||
#include <gmssl/gcm.h>
|
||||
#include <gmssl/error.h>
|
||||
|
||||
void sm4_cbc_encrypt(const SM4_KEY *key, const uint8_t iv[16],
|
||||
const uint8_t *in, size_t nblocks, uint8_t *out)
|
||||
{
|
||||
while (nblocks--) {
|
||||
gmssl_memxor(out, in, iv, 16);
|
||||
sm4_encrypt(key, out, out);
|
||||
iv = out;
|
||||
in += 16;
|
||||
out += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void sm4_cbc_decrypt(const SM4_KEY *key, const uint8_t iv[16],
|
||||
const uint8_t *in, size_t nblocks, uint8_t *out)
|
||||
{
|
||||
while (nblocks--) {
|
||||
sm4_encrypt(key, in, out);
|
||||
memxor(out, iv, 16);
|
||||
iv = in;
|
||||
in += 16;
|
||||
out += 16;
|
||||
}
|
||||
}
|
||||
|
||||
int sm4_cbc_padding_encrypt(const SM4_KEY *key, const uint8_t iv[16],
|
||||
const uint8_t *in, size_t inlen,
|
||||
uint8_t *out, size_t *outlen)
|
||||
{
|
||||
uint8_t block[16];
|
||||
size_t rem = inlen % 16;
|
||||
int padding = 16 - inlen % 16;
|
||||
|
||||
if (in) {
|
||||
memcpy(block, in + inlen - rem, rem);
|
||||
}
|
||||
memset(block + rem, padding, padding);
|
||||
if (inlen/16) {
|
||||
sm4_cbc_encrypt(key, iv, in, inlen/16, out);
|
||||
out += inlen - rem;
|
||||
iv = out - 16;
|
||||
}
|
||||
sm4_cbc_encrypt(key, iv, block, 1, out);
|
||||
*outlen = inlen - rem + 16;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_padding_decrypt(const SM4_KEY *key, const uint8_t iv[16],
|
||||
const uint8_t *in, size_t inlen,
|
||||
uint8_t *out, size_t *outlen)
|
||||
{
|
||||
uint8_t block[16];
|
||||
size_t len = sizeof(block);
|
||||
int padding;
|
||||
|
||||
if (inlen == 0) {
|
||||
error_puts("warning: input lenght = 0");
|
||||
return 0;
|
||||
}
|
||||
if (inlen%16 != 0 || inlen < 16) {
|
||||
error_puts("invalid cbc ciphertext length");
|
||||
return -1;
|
||||
}
|
||||
if (inlen > 16) {
|
||||
sm4_cbc_decrypt(key, iv, in, inlen/16 - 1, out);
|
||||
iv = in + inlen - 32;
|
||||
}
|
||||
sm4_cbc_decrypt(key, iv, in + inlen - 16, 1, block);
|
||||
|
||||
padding = block[15];
|
||||
if (padding < 1 || padding > 16) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
len -= padding;
|
||||
memcpy(out + inlen - 16, block, len);
|
||||
*outlen = inlen - padding;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void ctr_incr(uint8_t a[16])
|
||||
{
|
||||
int i;
|
||||
for (i = 15; i >= 0; i--) {
|
||||
a[i]++;
|
||||
if (a[i]) break;
|
||||
}
|
||||
}
|
||||
|
||||
// 这个函数支持任意长度的输入,如果输入的长度不是整数长度,那么调用会出现错误
|
||||
// 如果输出的长度正好可以凑够4个分组,那么我们就可以一次性的加密4个分组
|
||||
// 我们还是应该先准备一个底层的封装,就是CTR模式,给定一个ctr,然后我们输出的是4个分组,并且对ctr做一个变化
|
||||
|
||||
void sm4_encrypt4(const uint32_t rk[32], void *src, const void *dst);
|
||||
|
||||
void sm4_ctr_encrypt(const SM4_KEY *key, uint8_t ctr[16], const uint8_t *in, size_t inlen, uint8_t *out)
|
||||
{
|
||||
uint8_t blocks[64];
|
||||
|
||||
while (inlen >= 64) {
|
||||
memcpy(blocks, ctr, 16); ctr_incr(ctr);
|
||||
memcpy(blocks + 16, ctr, 16); ctr_incr(ctr);
|
||||
memcpy(blocks + 32, ctr, 16); ctr_incr(ctr);
|
||||
memcpy(blocks + 48, ctr, 16); ctr_incr(ctr);
|
||||
sm4_encrypt4(key->rk, blocks, blocks);
|
||||
gmssl_memxor(out, in, blocks, 64);
|
||||
in += 64;
|
||||
out += 64;
|
||||
inlen -= 64;
|
||||
}
|
||||
|
||||
while (inlen) {
|
||||
size_t len = inlen < 16 ? inlen : 16;
|
||||
sm4_encrypt(key, ctr, blocks);
|
||||
gmssl_memxor(out, in, blocks, len);
|
||||
ctr_incr(ctr);
|
||||
in += len;
|
||||
out += len;
|
||||
inlen -= len;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void sm4_ctr_encrypt(const SM4_KEY *key, uint8_t ctr[16], const uint8_t *in, size_t inlen, uint8_t *out)
|
||||
{
|
||||
uint8_t block[16];
|
||||
size_t len;
|
||||
|
||||
while (inlen) {
|
||||
len = inlen < 16 ? inlen : 16;
|
||||
sm4_encrypt(key, ctr, block);
|
||||
gmssl_memxor(out, in, block, len);
|
||||
ctr_incr(ctr);
|
||||
in += len;
|
||||
out += len;
|
||||
inlen -= len;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
int sm4_gcm_encrypt(const SM4_KEY *key, const uint8_t *iv, size_t ivlen,
|
||||
const uint8_t *aad, size_t aadlen, const uint8_t *in, size_t inlen,
|
||||
uint8_t *out, size_t taglen, uint8_t *tag)
|
||||
{
|
||||
const uint8_t *pin = in;
|
||||
uint8_t *pout = out;
|
||||
size_t left = inlen;
|
||||
uint8_t H[16] = {0};
|
||||
uint8_t Y[16];
|
||||
uint8_t T[16];
|
||||
|
||||
if (taglen > SM4_GCM_MAX_TAG_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
|
||||
sm4_encrypt(key, H, H);
|
||||
|
||||
if (ivlen == 12) {
|
||||
memcpy(Y, iv, 12);
|
||||
Y[12] = Y[13] = Y[14] = 0;
|
||||
Y[15] = 1;
|
||||
} else {
|
||||
ghash(H, NULL, 0, iv, ivlen, Y);
|
||||
}
|
||||
|
||||
sm4_encrypt(key, Y, T);
|
||||
|
||||
while (left) {
|
||||
uint8_t block[16];
|
||||
size_t len = left < 16 ? left : 16;
|
||||
ctr_incr(Y);
|
||||
sm4_encrypt(key, Y, block);
|
||||
gmssl_memxor(pout, pin, block, len);
|
||||
pin += len;
|
||||
pout += len;
|
||||
left -= len;
|
||||
}
|
||||
|
||||
ghash(H, aad, aadlen, out, inlen, H);
|
||||
gmssl_memxor(tag, T, H, taglen);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_gcm_decrypt(const SM4_KEY *key, const uint8_t *iv, size_t ivlen,
|
||||
const uint8_t *aad, size_t aadlen, const uint8_t *in, size_t inlen,
|
||||
const uint8_t *tag, size_t taglen, uint8_t *out)
|
||||
{
|
||||
const uint8_t *pin = in;
|
||||
uint8_t *pout = out;
|
||||
size_t left = inlen;
|
||||
uint8_t H[16] = {0};
|
||||
uint8_t Y[16];
|
||||
uint8_t T[16];
|
||||
|
||||
sm4_encrypt(key, H, H);
|
||||
|
||||
if (ivlen == 12) {
|
||||
memcpy(Y, iv, 12);
|
||||
Y[12] = Y[13] = Y[14] = 0;
|
||||
Y[15] = 1;
|
||||
} else {
|
||||
ghash(H, NULL, 0, iv, ivlen, Y);
|
||||
}
|
||||
|
||||
ghash(H, aad, aadlen, in, inlen, H);
|
||||
sm4_encrypt(key, Y, T);
|
||||
gmssl_memxor(T, T, H, taglen);
|
||||
if (memcmp(T, tag, taglen) != 0) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (left) {
|
||||
uint8_t block[16];
|
||||
size_t len = left < 16 ? left : 16;
|
||||
ctr_incr(Y);
|
||||
sm4_encrypt(key, Y, block);
|
||||
gmssl_memxor(pout, pin, block, len);
|
||||
pin += len;
|
||||
pout += len;
|
||||
left -= len;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_encrypt_init(SM4_CBC_CTX *ctx,
|
||||
const uint8_t key[SM4_BLOCK_SIZE], const uint8_t iv[SM4_BLOCK_SIZE])
|
||||
{
|
||||
sm4_set_encrypt_key(&ctx->sm4_key, key);
|
||||
memcpy(ctx->iv, iv, SM4_BLOCK_SIZE);
|
||||
memset(ctx->block, 0, SM4_BLOCK_SIZE);
|
||||
ctx->block_nbytes = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_encrypt_update(SM4_CBC_CTX *ctx,
|
||||
const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
size_t left;
|
||||
size_t nblocks;
|
||||
size_t len;
|
||||
|
||||
if (ctx->block_nbytes >= SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
*outlen = 0;
|
||||
if (ctx->block_nbytes) {
|
||||
left = SM4_BLOCK_SIZE - ctx->block_nbytes;
|
||||
if (inlen < left) {
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, inlen);
|
||||
ctx->block_nbytes += inlen;
|
||||
return 1;
|
||||
}
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, left);
|
||||
sm4_cbc_encrypt(&ctx->sm4_key, ctx->iv, ctx->block, 1, out);
|
||||
memcpy(ctx->iv, out, SM4_BLOCK_SIZE);
|
||||
in += left;
|
||||
inlen -= left;
|
||||
out += SM4_BLOCK_SIZE;
|
||||
*outlen += SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (inlen >= SM4_BLOCK_SIZE) {
|
||||
nblocks = inlen / SM4_BLOCK_SIZE;
|
||||
len = nblocks * SM4_BLOCK_SIZE;
|
||||
sm4_cbc_encrypt(&ctx->sm4_key, ctx->iv, in, nblocks, out);
|
||||
memcpy(ctx->iv, out + len - SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
|
||||
in += len;
|
||||
inlen -= len;
|
||||
out += len;
|
||||
*outlen += len;
|
||||
}
|
||||
if (inlen) {
|
||||
memcpy(ctx->block, in, inlen);
|
||||
}
|
||||
ctx->block_nbytes = inlen;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_encrypt_finish(SM4_CBC_CTX *ctx, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
size_t left;
|
||||
size_t i;
|
||||
|
||||
if (ctx->block_nbytes >= SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
if (sm4_cbc_padding_encrypt(&ctx->sm4_key, ctx->iv, ctx->block, ctx->block_nbytes, out, outlen) != 1) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_decrypt_init(SM4_CBC_CTX *ctx,
|
||||
const uint8_t key[SM4_BLOCK_SIZE], const uint8_t iv[SM4_BLOCK_SIZE])
|
||||
{
|
||||
sm4_set_decrypt_key(&ctx->sm4_key, key);
|
||||
memcpy(ctx->iv, iv, SM4_BLOCK_SIZE);
|
||||
memset(ctx->block, 0, SM4_BLOCK_SIZE);
|
||||
ctx->block_nbytes = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_decrypt_update(SM4_CBC_CTX *ctx,
|
||||
const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
size_t left, len, nblocks;
|
||||
|
||||
if (ctx->block_nbytes > SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
|
||||
*outlen = 0;
|
||||
if (ctx->block_nbytes) {
|
||||
left = SM4_BLOCK_SIZE - ctx->block_nbytes;
|
||||
if (inlen <= left) {
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, inlen);
|
||||
ctx->block_nbytes += inlen;
|
||||
return 1;
|
||||
}
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, left);
|
||||
sm4_cbc_decrypt(&ctx->sm4_key, ctx->iv, ctx->block, 1, out);
|
||||
memcpy(ctx->iv, ctx->block, SM4_BLOCK_SIZE);
|
||||
in += left;
|
||||
inlen -= left;
|
||||
out += SM4_BLOCK_SIZE;
|
||||
*outlen += SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (inlen > SM4_BLOCK_SIZE) {
|
||||
nblocks = (inlen-1) / SM4_BLOCK_SIZE;
|
||||
len = nblocks * SM4_BLOCK_SIZE;
|
||||
sm4_cbc_decrypt(&ctx->sm4_key, ctx->iv, in, nblocks, out);
|
||||
memcpy(ctx->iv, in + len - SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
|
||||
in += len;
|
||||
inlen -= len;
|
||||
out += len;
|
||||
*outlen += len;
|
||||
}
|
||||
memcpy(ctx->block, in, inlen);
|
||||
ctx->block_nbytes = inlen;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_cbc_decrypt_finish(SM4_CBC_CTX *ctx, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
if (ctx->block_nbytes != SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
if (sm4_cbc_padding_decrypt(&ctx->sm4_key, ctx->iv, ctx->block, SM4_BLOCK_SIZE, out, outlen) != 1) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_ctr_encrypt_init(SM4_CTR_CTX *ctx,
|
||||
const uint8_t key[SM4_BLOCK_SIZE], const uint8_t ctr[SM4_BLOCK_SIZE])
|
||||
{
|
||||
sm4_set_encrypt_key(&ctx->sm4_key, key);
|
||||
memcpy(ctx->ctr, ctr, SM4_BLOCK_SIZE);
|
||||
memset(ctx->block, 0, SM4_BLOCK_SIZE);
|
||||
ctx->block_nbytes = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_ctr_encrypt_update(SM4_CTR_CTX *ctx,
|
||||
const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
size_t left;
|
||||
size_t nblocks;
|
||||
size_t len;
|
||||
|
||||
if (ctx->block_nbytes >= SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
*outlen = 0;
|
||||
if (ctx->block_nbytes) {
|
||||
left = SM4_BLOCK_SIZE - ctx->block_nbytes;
|
||||
if (inlen < left) {
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, inlen);
|
||||
ctx->block_nbytes += inlen;
|
||||
return 1;
|
||||
}
|
||||
memcpy(ctx->block + ctx->block_nbytes, in, left);
|
||||
sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, ctx->block, SM4_BLOCK_SIZE, out);
|
||||
in += left;
|
||||
inlen -= left;
|
||||
out += SM4_BLOCK_SIZE;
|
||||
*outlen += SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (inlen >= SM4_BLOCK_SIZE) {
|
||||
nblocks = inlen / SM4_BLOCK_SIZE;
|
||||
len = nblocks * SM4_BLOCK_SIZE;
|
||||
sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, in, len, out);
|
||||
in += len;
|
||||
inlen -= len;
|
||||
out += len;
|
||||
*outlen += len;
|
||||
}
|
||||
if (inlen) {
|
||||
memcpy(ctx->block, in, inlen);
|
||||
}
|
||||
ctx->block_nbytes = inlen;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int sm4_ctr_encrypt_finish(SM4_CTR_CTX *ctx, uint8_t *out, size_t *outlen)
|
||||
{
|
||||
size_t left;
|
||||
if (ctx->block_nbytes >= SM4_BLOCK_SIZE) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
sm4_ctr_encrypt(&ctx->sm4_key, ctx->ctr, ctx->block, ctx->block_nbytes, out);
|
||||
*outlen = ctx->block_nbytes;
|
||||
return 1;
|
||||
}
|
||||
120
src/sm4ni.c
120
src/sm4ni.c
@@ -1,120 +0,0 @@
|
||||
// sm4ni.c
|
||||
// 2018-04-20 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
|
||||
// Vectorized implementation of SM4. Uses affine transformations and AES NI
|
||||
// to implement the SM4 S-Box.
|
||||
|
||||
//#include "sm4_ref.h"
|
||||
#include <x86intrin.h>
|
||||
|
||||
// Encrypt 4 blocks (64 bytes) in ECB mode
|
||||
|
||||
void sm4_encrypt4(const uint32_t rk[32], void *src, const void *dst)
|
||||
{
|
||||
// nibble mask
|
||||
const __m128i c0f __attribute__((aligned(0x10))) =
|
||||
{ 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F };
|
||||
|
||||
// flip all bytes in all 32-bit words
|
||||
const __m128i flp __attribute__((aligned(0x10))) =
|
||||
{ 0x0405060700010203, 0x0C0D0E0F08090A0B };
|
||||
|
||||
// inverse shift rows
|
||||
const __m128i shr __attribute__((aligned(0x10))) =
|
||||
{ 0x0B0E0104070A0D00, 0x0306090C0F020508 };
|
||||
|
||||
// Affine transform 1 (low and high hibbles)
|
||||
const __m128i m1l __attribute__((aligned(0x10))) =
|
||||
{ 0x9197E2E474720701, 0xC7C1B4B222245157 };
|
||||
const __m128i m1h __attribute__((aligned(0x10))) =
|
||||
{ 0xE240AB09EB49A200, 0xF052B91BF95BB012 };
|
||||
|
||||
// Affine transform 2 (low and high hibbles)
|
||||
const __m128i m2l __attribute__((aligned(0x10))) =
|
||||
{ 0x5B67F2CEA19D0834, 0xEDD14478172BBE82 };
|
||||
const __m128i m2h __attribute__((aligned(0x10))) =
|
||||
{ 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF };
|
||||
|
||||
// left rotations of 32-bit words by 8-bit increments
|
||||
const __m128i r08 __attribute__((aligned(0x10))) =
|
||||
{ 0x0605040702010003, 0x0E0D0C0F0A09080B };
|
||||
const __m128i r16 __attribute__((aligned(0x10))) =
|
||||
{ 0x0504070601000302, 0x0D0C0F0E09080B0A };
|
||||
const __m128i r24 __attribute__((aligned(0x10))) =
|
||||
{ 0x0407060500030201, 0x0C0F0E0D080B0A09 };
|
||||
|
||||
__m128i x, y, t0, t1, t2, t3;
|
||||
|
||||
uint32_t k, *p32, v[4] __attribute__((aligned(0x10)));
|
||||
int i;
|
||||
|
||||
p32 = (uint32_t *) src;
|
||||
t0 = _mm_set_epi32(p32[12], p32[ 8], p32[ 4], p32[ 0]);
|
||||
t0 = _mm_shuffle_epi8(t0, flp);
|
||||
t1 = _mm_set_epi32(p32[13], p32[ 9], p32[ 5], p32[ 1]);
|
||||
t1 = _mm_shuffle_epi8(t1, flp);
|
||||
t2 = _mm_set_epi32(p32[14], p32[10], p32[ 6], p32[ 2]);
|
||||
t2 = _mm_shuffle_epi8(t2, flp);
|
||||
t3 = _mm_set_epi32(p32[15], p32[11], p32[ 7], p32[ 3]);
|
||||
t3 = _mm_shuffle_epi8(t3, flp);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
|
||||
k = rk[i];
|
||||
x = t1 ^ t2 ^ t3 ^ _mm_set_epi32(k, k, k, k);
|
||||
|
||||
y = _mm_and_si128(x, c0f); // inner affine
|
||||
y = _mm_shuffle_epi8(m1l, y);
|
||||
x = _mm_srli_epi64(x, 4);
|
||||
x = _mm_and_si128(x, c0f);
|
||||
x = _mm_shuffle_epi8(m1h, x) ^ y;
|
||||
|
||||
x = _mm_shuffle_epi8(x, shr); // inverse MixColumns
|
||||
x = _mm_aesenclast_si128(x, c0f); // AESNI instruction
|
||||
|
||||
y = _mm_andnot_si128(x, c0f); // outer affine
|
||||
y = _mm_shuffle_epi8(m2l, y);
|
||||
x = _mm_srli_epi64(x, 4);
|
||||
x = _mm_and_si128(x, c0f);
|
||||
x = _mm_shuffle_epi8(m2h, x) ^ y;
|
||||
|
||||
// 4 parallel L1 linear transforms
|
||||
y = x ^ _mm_shuffle_epi8(x, r08) ^ _mm_shuffle_epi8(x, r16);
|
||||
y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30);
|
||||
x = x ^ y ^ _mm_shuffle_epi8(x, r24);
|
||||
|
||||
// rotate registers
|
||||
x ^= t0;
|
||||
t0 = t1;
|
||||
t1 = t2;
|
||||
t2 = t3;
|
||||
t3 = x;
|
||||
}
|
||||
|
||||
p32 = (uint32_t *) dst;
|
||||
|
||||
_mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t3, flp));
|
||||
p32[ 0] = v[0];
|
||||
p32[ 4] = v[1];
|
||||
p32[ 8] = v[2];
|
||||
p32[12] = v[3];
|
||||
|
||||
_mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t2, flp));
|
||||
p32[ 1] = v[0];
|
||||
p32[ 5] = v[1];
|
||||
p32[ 9] = v[2];
|
||||
p32[13] = v[3];
|
||||
|
||||
_mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t1, flp));
|
||||
p32[ 2] = v[0];
|
||||
p32[ 6] = v[1];
|
||||
p32[10] = v[2];
|
||||
p32[14] = v[3];
|
||||
|
||||
_mm_store_si128((__m128i *) v, _mm_shuffle_epi8(t0, flp));
|
||||
p32[ 3] = v[0];
|
||||
p32[ 7] = v[1];
|
||||
p32[11] = v[2];
|
||||
p32[15] = v[3];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user