mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-06-19 11:23:38 +08:00
version 2.5.3
new sms4 api, go api and ciphersuites
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2014 - 2016 The GmSSL Project. All rights reserved.
|
||||
* Copyright (c) 2014 - 2019 The GmSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@@ -47,32 +47,15 @@
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <openssl/sms4.h>
|
||||
#include "internal/rotate.h"
|
||||
#include "modes_lcl.h"
|
||||
#include "sms4_lcl.h"
|
||||
|
||||
static __m256i mask_ffff;
|
||||
static __m256i vindex_0s;
|
||||
static __m256i vindex_4i;
|
||||
static __m256i vindex_swap;
|
||||
static __m256i vindex_read;
|
||||
#ifdef SMS4_AVX2
|
||||
# include <immintrin.h>
|
||||
|
||||
|
||||
void sms4_avx2_encrypt_init(sms4_key_t *key)
|
||||
{
|
||||
mask_ffff = _mm256_set1_epi32(0xffff);
|
||||
vindex_0s = _mm256_set1_epi32(0);
|
||||
vindex_4i = _mm256_setr_epi32(0,4,8,12,16,20,24,28);
|
||||
vindex_read = _mm256_setr_epi32(0,8,16,24,1,9,17,25);
|
||||
vindex_swap = _mm256_setr_epi8(
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
|
||||
);
|
||||
sms4_init_sbox32();
|
||||
}
|
||||
|
||||
#define GET_BLKS(x0, x1, x2, x3, in) \
|
||||
# define GET_BLKS(x0, x1, x2, x3, in) \
|
||||
t0 = _mm256_i32gather_epi32((int *)(in+4*0), vindex_4i, 4); \
|
||||
t1 = _mm256_i32gather_epi32((int *)(in+4*1), vindex_4i, 4); \
|
||||
t2 = _mm256_i32gather_epi32((int *)(in+4*2), vindex_4i, 4); \
|
||||
@@ -82,7 +65,7 @@ void sms4_avx2_encrypt_init(sms4_key_t *key)
|
||||
x2 = _mm256_shuffle_epi8(t2, vindex_swap); \
|
||||
x3 = _mm256_shuffle_epi8(t3, vindex_swap)
|
||||
|
||||
#define PUT_BLKS(out, x0, x1, x2, x3) \
|
||||
# define PUT_BLKS(out, x0, x1, x2, x3) \
|
||||
t0 = _mm256_shuffle_epi8(x0, vindex_swap); \
|
||||
t1 = _mm256_shuffle_epi8(x1, vindex_swap); \
|
||||
t2 = _mm256_shuffle_epi8(x2, vindex_swap); \
|
||||
@@ -91,59 +74,135 @@ void sms4_avx2_encrypt_init(sms4_key_t *key)
|
||||
_mm256_storeu_si256((__m256i *)(out+32*1), t1); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*2), t2); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*3), t3); \
|
||||
x0 = _mm256_i32gather_epi32((int *)(in+32*0), vindex_read, 4); \
|
||||
x1 = _mm256_i32gather_epi32((int *)(in+32*1), vindex_read, 4); \
|
||||
x2 = _mm256_i32gather_epi32((int *)(in+32*2), vindex_read, 4); \
|
||||
x3 = _mm256_i32gather_epi32((int *)(in+32*3), vindex_read, 4); \
|
||||
_mm256_storeu_si256((__m256i *)(out+2*0), x0); \
|
||||
_mm256_storeu_si256((__m256i *)(out+2*1), x1); \
|
||||
_mm256_storeu_si256((__m256i *)(out+2*2), x2); \
|
||||
_mm256_storeu_si256((__m256i *)(out+2*3), x3)
|
||||
x0 = _mm256_i32gather_epi32((int *)(out+8*0), vindex_read, 4); \
|
||||
x1 = _mm256_i32gather_epi32((int *)(out+8*1), vindex_read, 4); \
|
||||
x2 = _mm256_i32gather_epi32((int *)(out+8*2), vindex_read, 4); \
|
||||
x3 = _mm256_i32gather_epi32((int *)(out+8*3), vindex_read, 4); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*0), x0); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*1), x1); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*2), x2); \
|
||||
_mm256_storeu_si256((__m256i *)(out+32*3), x3)
|
||||
|
||||
#define S(x0, t0, t1, t2) \
|
||||
t0 = _mm256_and_si256(x0, mask_ffff); \
|
||||
t1 = _mm256_i32gather_epi32(SBOX32L, t0, 4); \
|
||||
t0 = _mm256_srli_epi32(x0, 16); \
|
||||
t2 = _mm256_i32gather_epi32(SBOX32H, t0, 4); \
|
||||
x0 = _mm256_xor_si256(t1, t2)
|
||||
# define _mm256_rotl_epi32(a, i) _mm256_xor_si256( \
|
||||
_mm256_slli_epi32(a, i), _mm256_srli_epi32(a, 32 - i))
|
||||
|
||||
#define ROT(r0, x0, i, t0, t1) \
|
||||
t0 = _mm256_slli_epi32(x0, i); \
|
||||
t1 = _mm256_srli_epi32(x0,32-i); \
|
||||
r0 = _mm256_xor_si256(t0, t1)
|
||||
# define INDEX_MASK_TBOX 0xff
|
||||
|
||||
#define L(x0, t0, t1, t2, t3, t4) \
|
||||
ROT(t0, x0, 2, t2, t3); \
|
||||
ROT(t1, x0, 10, t2, t3); \
|
||||
t4 = _mm256_xor_si256(t0, t1); \
|
||||
ROT(t0, x0, 18, t2, t3); \
|
||||
ROT(t1, x0, 24, t2, t3); \
|
||||
t3 = _mm256_xor_si256(t0, t1); \
|
||||
t2 = _mm256_xor_si256(x0, t3); \
|
||||
x0 = _mm256_xor_si256(t2, t4)
|
||||
# define ROUND_TBOX(x0, x1, x2, x3, x4, i) \
|
||||
t0 = _mm256_set1_epi32(*(rk + i)); \
|
||||
t1 = _mm256_xor_si256(x1, x2); \
|
||||
t2 = _mm256_xor_si256(x3, t0); \
|
||||
x4 = _mm256_xor_si256(t1, t2); \
|
||||
t0 = _mm256_and_si256(x4, vindex_mask); \
|
||||
t0 = _mm256_i32gather_epi32((int *)SMS4_T, t0, 4); \
|
||||
t0 = _mm256_rotl_epi32(t0, 8); \
|
||||
x4 = _mm256_srli_epi32(x4, 8); \
|
||||
x0 = _mm256_xor_si256(x0, t0); \
|
||||
t0 = _mm256_and_si256(x4, vindex_mask); \
|
||||
t0 = _mm256_i32gather_epi32((int *)SMS4_T, t0, 4); \
|
||||
t0 = _mm256_rotl_epi32(t0, 16); \
|
||||
x4 = _mm256_srli_epi32(x4, 8); \
|
||||
x0 = _mm256_xor_si256(x0, t0); \
|
||||
t0 = _mm256_and_si256(x4, vindex_mask); \
|
||||
t0 = _mm256_i32gather_epi32((int *)SMS4_T, t0, 4); \
|
||||
t0 = _mm256_rotl_epi32(t0, 24); \
|
||||
x4 = _mm256_srli_epi32(x4, 8); \
|
||||
x0 = _mm256_xor_si256(x0, t0); \
|
||||
t1 = _mm256_i32gather_epi32((int *)SMS4_T, x4, 4); \
|
||||
x4 = _mm256_xor_si256(x0, t1)
|
||||
|
||||
#define ROUND(x0, x1, x2, x3, x4, i) \
|
||||
t0 = _mm256_i32gather_epi32(rk+i, vindex_0s, 4); \
|
||||
t1 = _mm256_xor_si256(x1, x2); \
|
||||
t2 = _mm256_xor_si256(x3, t0); \
|
||||
t0 = _mm256_xor_si256(t1, t2); \
|
||||
S(t0, x4, t1, t2); \
|
||||
L(t0, x4, t1, t2, t3, t4); \
|
||||
x4 = _mm256_xor_si256(x0, t0);
|
||||
# define INDEX_MASK_DBOX 0xffff
|
||||
|
||||
# define ROUND_DBOX(x0, x1, x2, x3, x4, i) \
|
||||
t0 = _mm256_set1_epi32(*(rk + i)); \
|
||||
t1 = _mm256_xor_si256(x1, x2); \
|
||||
t2 = _mm256_xor_si256(x3, t0); \
|
||||
x4 = _mm256_xor_si256(t1, t2); \
|
||||
t0 = _mm256_srli_epi32(x4, 16); \
|
||||
t1 = _mm256_i32gather_epi32((int *)SMS4_D, t0, 4); \
|
||||
t2 = _mm256_and_si256(x4, vindex_mask); \
|
||||
t3 = _mm256_i32gather_epi32((int *)SMS4_D, t2, 4); \
|
||||
t0 = _mm256_rotl_epi32(t3, 16); \
|
||||
x4 = _mm256_xor_si256(x0, t1); \
|
||||
x4 = _mm256_xor_si256(x4, t0)
|
||||
|
||||
# define ROUND ROUND_TBOX
|
||||
# define INDEX_MASK INDEX_MASK_TBOX
|
||||
|
||||
|
||||
void sms4_avx2_encrypt_8blocks(const unsigned char *in, unsigned char *out, const sms4_key_t *key)
|
||||
void sms4_avx2_ecb_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
||||
size_t blocks, const sms4_key_t *key)
|
||||
{
|
||||
const int *rk = (int *)key->rk;
|
||||
__m256i x0, x1, x2, x3, x4;
|
||||
__m256i t0, t1, t2, t3, t4;
|
||||
GET_BLKS(x0, x1, x2, x3, in);
|
||||
ROUNDS(x0, x1, x2, x3, x4);
|
||||
PUT_BLKS(out, x0, x4, x3, x2);
|
||||
__m256i t0, t1, t2, t3;
|
||||
__m256i vindex_4i = _mm256_setr_epi32(0,4,8,12,16,20,24,28);
|
||||
__m256i vindex_mask = _mm256_set1_epi32(INDEX_MASK);
|
||||
__m256i vindex_read = _mm256_setr_epi32(0,8,16,24,1,9,17,25);
|
||||
__m256i vindex_swap = _mm256_setr_epi8(
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
|
||||
);
|
||||
|
||||
while (blocks >= 8) {
|
||||
GET_BLKS(x0, x1, x2, x3, in);
|
||||
ROUNDS(x0, x1, x2, x3, x4);
|
||||
PUT_BLKS(out, x0, x4, x3, x2);
|
||||
in += 128;
|
||||
out += 128;
|
||||
blocks -= 8;
|
||||
}
|
||||
|
||||
while (blocks--) {
|
||||
sms4_encrypt(in, out, key);
|
||||
in += 16;
|
||||
out += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void sms4_avx2_encrypt_16blocks(const unsigned char *in, unsigned char *out, const sms4_key_t *key)
|
||||
void sms4_avx2_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
||||
size_t blocks, const sms4_key_t *key, const unsigned char iv[16])
|
||||
{
|
||||
sms4_encrypt_8blocks(key, in, out);
|
||||
sms4_encrypt_8blocks(key, in + 16*8, out + 16*8);
|
||||
const int *rk = (int *)key->rk;
|
||||
__m256i x0, x1, x2, x3, x4;
|
||||
__m256i t0, t1, t2, t3;
|
||||
__m256i vindex_4i = _mm256_setr_epi32(0,4,8,12,16,20,24,28);
|
||||
__m256i vindex_mask = _mm256_set1_epi32(INDEX_MASK);
|
||||
__m256i vindex_read = _mm256_setr_epi32(0,8,16,24,1,9,17,25);
|
||||
__m256i vindex_swap = _mm256_setr_epi8(
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,
|
||||
3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
|
||||
);
|
||||
__m256i incr = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
int c0 = (int)GETU32(iv );
|
||||
int c1 = (int)GETU32(iv + 4);
|
||||
int c2 = (int)GETU32(iv + 8);
|
||||
int c3 = (int)GETU32(iv + 12);
|
||||
|
||||
while (blocks >= 8) {
|
||||
x0 = _mm256_set1_epi32(c0);
|
||||
x1 = _mm256_set1_epi32(c1);
|
||||
x2 = _mm256_set1_epi32(c2);
|
||||
x3 = _mm256_set1_epi32(c3);
|
||||
x3 = _mm256_add_epi32(x3, incr);
|
||||
ROUNDS(x0, x1, x2, x3, x4);
|
||||
GET_BLKS(t0, t1, t2, t3, in);
|
||||
x0 = _mm256_xor_si256(x0, t0);
|
||||
x4 = _mm256_xor_si256(x4, t1);
|
||||
x3 = _mm256_xor_si256(x3, t2);
|
||||
x2 = _mm256_xor_si256(x2, t3);
|
||||
PUT_BLKS(out, x0, x4, x3, x2);
|
||||
c3 += 8;
|
||||
in += 128;
|
||||
out += 128;
|
||||
blocks -= 8;
|
||||
}
|
||||
|
||||
if (blocks) {
|
||||
unsigned char ctr[16];
|
||||
memcpy(ctr, iv, 12);
|
||||
PUTU32(ctr + 12, c3);
|
||||
sms4_ctr32_encrypt_blocks(in, out, blocks, key, ctr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user