mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-05-12 03:16:25 +08:00
357 lines
9.1 KiB
C
Executable File
357 lines
9.1 KiB
C
Executable File
/* ====================================================================
|
|
* Copyright (c) 2014 - 2017 The GmSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the GmSSL Project.
|
|
* (http://gmssl.org/)"
|
|
*
|
|
* 4. The name "GmSSL Project" must not be used to endorse or promote
|
|
* products derived from this software without prior written
|
|
* permission. For written permission, please contact
|
|
* guanzhi1980@gmail.com.
|
|
*
|
|
* 5. Products derived from this software may not be called "GmSSL"
|
|
* nor may "GmSSL" appear in their names without prior written
|
|
* permission of the GmSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the GmSSL Project
|
|
* (http://gmssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE GmSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE GmSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ====================================================================
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <openssl/sm3.h>
|
|
#include "internal/rotate.h"
|
|
#include "internal/byteorder.h"
|
|
#include "modes_lcl.h"
|
|
#include <immintrin.h>
|
|
|
|
|
|
typedef struct {
|
|
uint32_t A[8];
|
|
uint32_t B[8];
|
|
uint32_t C[8];
|
|
uint32_t D[8];
|
|
uint32_t E[8];
|
|
uint32_t F[8];
|
|
uint32_t G[8];
|
|
uint32_t H[8];
|
|
} SM3_AVX2_CTX;
|
|
|
|
|
|
void sm3_mb_init(sm3_mb_ctx_t *ctx)
|
|
{
|
|
int i;
|
|
memset(ctx, 0, sizeof(*ctx));
|
|
for (i = 0; i < 8; i++) {
|
|
ctx->A[i] = 0x7380166F;
|
|
ctx->B[i] = 0x4914B2B9;
|
|
ctx->C[i] = 0x172442D7;
|
|
ctx->D[i] = 0xDA8A0600;
|
|
ctx->E[i] = 0xA96F30BC;
|
|
ctx->F[i] = 0x163138AA;
|
|
ctx->G[i] = 0xE38DEE4D;
|
|
ctx->H[i] = 0xB0FB0E4E;
|
|
}
|
|
}
|
|
|
|
void sm3_update(sm3_ctx_t *ctx, const unsigned char *data, size_t data_len)
|
|
{
|
|
size_t blocks = data_len / SM3_BLOCK_SIZE;
|
|
|
|
if (ctx->num) {
|
|
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
|
if (data_len < left) {
|
|
memcpy(ctx->block + ctx->num, data, data_len);
|
|
ctx->num += data_len;
|
|
return;
|
|
} else {
|
|
memcpy(ctx->block + ctx->num, data, left);
|
|
sm3_compress(ctx->digest, ctx->block);
|
|
ctx->nblocks++;
|
|
data += left;
|
|
data_len -= left;
|
|
}
|
|
}
|
|
|
|
sm3_compress_blocks(ctx->digest, data, blocks);
|
|
ctx->nblocks += blocks;
|
|
data += SM3_BLOCK_SIZE * blocks;
|
|
data_len -= SM3_BLOCK_SIZE * blocks;
|
|
|
|
ctx->num = data_len;
|
|
if (data_len) {
|
|
memcpy(ctx->block, data, data_len);
|
|
}
|
|
}
|
|
|
|
void sm3_final(sm3_ctx_t *ctx, unsigned char *digest)
|
|
{
|
|
int i;
|
|
|
|
ctx->block[ctx->num] = 0x80;
|
|
|
|
if (ctx->num + 9 <= SM3_BLOCK_SIZE) {
|
|
memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 9);
|
|
} else {
|
|
memset(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 1);
|
|
sm3_compress(ctx->digest, ctx->block);
|
|
memset(ctx->block, 0, SM3_BLOCK_SIZE - 8);
|
|
}
|
|
PUTU32(ctx->block + 56, ctx->nblocks >> 23);
|
|
PUTU32(ctx->block + 60, (ctx->nblocks << 9) + (ctx->num << 3));
|
|
|
|
sm3_compress(ctx->digest, ctx->block);
|
|
for (i = 0; i < 8; i++) {
|
|
PUTU32(digest + i*4, ctx->digest[i]);
|
|
}
|
|
}
|
|
|
|
|
|
#define VPUT(addr, X) _mm256_store_si256((__m256i *)(addr), _mm256_load_si256((__m256i *)(addr), X))
|
|
# define _mm256_rotl_epi32(X, i) \
|
|
_mm256_xor_si256( \
|
|
_mm256_slli_epi32(a, i), \
|
|
_mm256_srli_epi32(a, 32 - i))
|
|
|
|
/* P0(X) = X ^ (X <<< 9) ^ (X <<< 17) */
|
|
#define P0(X) \
|
|
_mm256_xor_si256( \
|
|
_mm256_xor_si256(X, \
|
|
_mm256_rotl_epi32(X, 9)), \
|
|
_mm256_rotl_epi32(X, 17))
|
|
|
|
/* P1(X) = X ^ (X <<< 15) ^ (X <<< 23) */
|
|
#define P1(X) \
|
|
_mm256_xor_si256( \
|
|
_mm256_xor_si256(X, \
|
|
_mm256_rotl_epi32(X, 15)), \
|
|
_mm256_rotl_epi32(X, 23))
|
|
|
|
/* FF0(X, Y, Z) = X ^ Y ^ Z */
|
|
#define FF00(X,Y,Z) \
|
|
_mm256_xor_si256(X, _mm256_xor_si256(Y, Z))
|
|
|
|
/* FF16(X, Y, Z) = (X and Y) or (X and Z) or (Y and Z) */
|
|
#define FF16(X,Y,Z) \
|
|
_mm256_or_si256( \
|
|
_mm256_and_si256(X, Y), \
|
|
_mm256_or_si256( \
|
|
_mm256_and_si256(X, Z), \
|
|
_mm256_and_si256(Y, Z)))
|
|
|
|
/* GG00(X, Y, Z) = X ^ Y ^ Z */
|
|
#define G00(X,Y,Z) F00(X,Y,Z)
|
|
|
|
/* #define GG16(x,y,z) ((((y)^(z)) & (x)) ^ (z)) */
|
|
#define GG16(X,Y,Z) \
|
|
_mm256_xor_si256(Z, \
|
|
_mm256_and_si256(X, \
|
|
_mm256_xor_si256(Y, Z)))
|
|
|
|
#define R(A, B, C, D, E, F, G, H, xx) \
|
|
SS1 = ROL32((ROL32(A, 12) + E + K[j]), 7); \
|
|
A12 = VROL(A, 12); \
|
|
SS1 = VADD(A12, E); \
|
|
SS1 = VADD(SS1, K(j)); \
|
|
SS1 = VROL(SS1, 7); \
|
|
SS2 = VXOR(SS1, A12); \
|
|
TT1 = FF##xx(A, B, C);
|
|
TT1 = VADD(TT1, D);
|
|
TT1 = VADD(TT1, SS2);
|
|
TT1 = VXOR(W(j), W(j+4));
|
|
TT2 =
|
|
|
|
SS2 = SS1 ^ ROL32(A, 12); \
|
|
TT1 = FF##xx(A, B, C) + D + SS2 + (W[j] ^ W[j + 4]); \
|
|
TT2 = GG##xx(E, F, G) + H + SS1 + W[j]; \
|
|
B = ROL32(B, 9); \
|
|
H = TT1; \
|
|
F = ROL32(F, 19); \
|
|
D = P0(TT2); \
|
|
j++
|
|
|
|
|
|
#define R8(A, B, C, D, E, F, G, H, xx) \
|
|
R1(A, B, C, D, E, F, G, H, xx); \
|
|
R1(H, A, B, C, D, E, F, G, xx); \
|
|
R1(G, H, A, B, C, D, E, F, xx); \
|
|
R1(F, G, H, A, B, C, D, E, xx); \
|
|
R1(E, F, G, H, A, B, C, D, xx); \
|
|
R1(D, E, F, G, H, A, B, C, xx); \
|
|
R1(C, D, E, F, G, H, A, B, xx); \
|
|
R1(B, C, D, E, F, G, H, A, xx)
|
|
|
|
|
|
#define T00 0x79cc4519U
|
|
#define T16 0x7a879d8aU
|
|
|
|
#define K0 0x79cc4519U
|
|
#define K1 0xf3988a32U
|
|
#define K2 0xe7311465U
|
|
#define K3 0xce6228cbU
|
|
#define K4 0x9cc45197U
|
|
#define K5 0x3988a32fU
|
|
#define K6 0x7311465eU
|
|
#define K7 0xe6228cbcU
|
|
#define K8 0xcc451979U
|
|
#define K9 0x988a32f3U
|
|
#define K10 0x311465e7U
|
|
#define K11 0x6228cbceU
|
|
#define K12 0xc451979cU
|
|
#define K13 0x88a32f39U
|
|
#define K14 0x11465e73U
|
|
#define K15 0x228cbce6U
|
|
#define K16 0x9d8a7a87U
|
|
#define K17 0x3b14f50fU
|
|
#define K18 0x7629ea1eU
|
|
#define K19 0xec53d43cU
|
|
#define K20 0xd8a7a879U
|
|
#define K21 0xb14f50f3U
|
|
#define K22 0x629ea1e7U
|
|
#define K23 0xc53d43ceU
|
|
#define K24 0x8a7a879dU
|
|
#define K25 0x14f50f3bU
|
|
#define K26 0x29ea1e76U
|
|
#define K27 0x53d43cecU
|
|
#define K28 0xa7a879d8U
|
|
#define K29 0x4f50f3b1U
|
|
#define K30 0x9ea1e762U
|
|
#define K31 0x3d43cec5U
|
|
#define K32 0x7a879d8aU
|
|
#define K33 0xf50f3b14U
|
|
#define K34 0xea1e7629U
|
|
#define K35 0xd43cec53U
|
|
#define K36 0xa879d8a7U
|
|
#define K37 0x50f3b14fU
|
|
#define K38 0xa1e7629eU
|
|
#define K39 0x43cec53dU
|
|
#define K40 0x879d8a7aU
|
|
#define K41 0x0f3b14f5U
|
|
#define K42 0x1e7629eaU
|
|
#define K43 0x3cec53d4U
|
|
#define K44 0x79d8a7a8U
|
|
#define K45 0xf3b14f50U
|
|
#define K46 0xe7629ea1U
|
|
#define K47 0xcec53d43U
|
|
#define K48 0x9d8a7a87U
|
|
#define K49 0x3b14f50fU
|
|
#define K50 0x7629ea1eU
|
|
#define K51 0xec53d43cU
|
|
#define K52 0xd8a7a879U
|
|
#define K53 0xb14f50f3U
|
|
#define K54 0x629ea1e7U
|
|
#define K55 0xc53d43ceU
|
|
#define K56 0x8a7a879dU
|
|
#define K57 0x14f50f3bU
|
|
#define K58 0x29ea1e76U
|
|
#define K59 0x53d43cecU
|
|
#define K60 0xa7a879d8U
|
|
#define K61 0x4f50f3b1U
|
|
#define K62 0x9ea1e762U
|
|
#define K63 0x3d43cec5U
|
|
|
|
|
|
|
|
|
|
|
|
static void sm3_avx2_compress_blocks(uint32_t digest[8][8],
|
|
const unsigned char *data, size_t offset[8], size_t blocks)
|
|
{
|
|
VREG A, B, C, D, E, F, G, H;
|
|
VREG SS1, SS2, TT1, TT2;
|
|
|
|
uint32_t W[68][8];
|
|
int j;
|
|
|
|
while (blocks--) {
|
|
|
|
A = _mm256_load_si256((__m256i *)ctx->A);
|
|
B = _mm256_load_si256((__m256i *)ctx->B);
|
|
C = _mm256_load_si256((__m256i *)ctx->C);
|
|
D = _mm256_load_si256((__m256i *)ctx->D);
|
|
E = _mm256_load_si256((__m256i *)ctx->E);
|
|
F = _mm256_load_si256((__m256i *)ctx->F);
|
|
G = _mm256_load_si256((__m256i *)ctx->G);
|
|
H = _mm256_load_si256((__m256i *)ctx->H);
|
|
|
|
for (j = 0; j < 16; j++) {
|
|
W = _mm256_i32gather_epi32((int *)data, I, 4);
|
|
W = _mm255_shuffle_epi8(W, vindex_swap);
|
|
_mm256_store_si256((__m256i *)(w + j * 8), W);
|
|
}
|
|
|
|
for (; j < 68; j++) {
|
|
PUTV32(W[j],
|
|
P1(GETV32(W[j - 16]) ^ GETV32(W[j - 9]) ^ ROL32(GETV32(W[j - 3]), 15))
|
|
^ ROL32(GETV32(W[j - 13]), 7) ^ W[j - 6];
|
|
|
|
}
|
|
|
|
|
|
j = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VPUT(ctx->A, A);
|
|
VPUT(ctx->B, B);
|
|
VPUT(ctx->C, C);
|
|
VPUT(ctx->D, D);
|
|
VPUT(ctx->E, E);
|
|
VPUT(ctx->F, F);
|
|
VPUT(ctx->G, G);
|
|
VPUT(ctx->H, H);
|
|
|
|
data += 64;
|
|
}
|
|
}
|
|
|
|
void sm3_compress(uint32_t digest[8], const unsigned char block[64])
|
|
{
|
|
return sm3_compress_blocks(digest, block, 1);
|
|
}
|
|
|
|
void sm3(const unsigned char *msg, size_t msglen,
|
|
unsigned char dgst[SM3_DIGEST_LENGTH])
|
|
{
|
|
sm3_ctx_t ctx;
|
|
|
|
sm3_init(&ctx);
|
|
sm3_update(&ctx, msg, msglen);
|
|
sm3_final(&ctx, dgst);
|
|
|
|
memset(&ctx, 0, sizeof(sm3_ctx_t));
|
|
}
|
|
|