diff --git a/.gitignore b/.gitignore index 960e006f..c415c39f 100644 --- a/.gitignore +++ b/.gitignore @@ -211,10 +211,14 @@ include/openssl/srp.h /python /build + .gmssl CMakeFiles/ CTestTestfile.cmake cmake_install.cmake install_manifest.txt CMakeCache.txt -bin/ \ No newline at end of file +bin/ + +.vscode/* + diff --git a/src/gf128_avx.c b/src/gf128_avx.c new file mode 100644 index 00000000..8207561c --- /dev/null +++ b/src/gf128_avx.c @@ -0,0 +1,234 @@ +/* + * Copyright 2014-2022 The GmSSL Project. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + + +/* GF(2^128) defined by f(x) = x^128 + x^7 + x^2 + x + 1 + * A + B mod f(x) = a xor b + * A * 2 mod f(x) + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +gf128_t gf128_zero(void) +{ + uint8_t zero[16] = {0}; + return gf128_from_bytes(zero); +} + +gf128_t gf128_from_hex(const char *s) +{ + uint8_t bin[16]; + size_t len; + hex_to_bytes(s, strlen(s), bin, &len); + return gf128_from_bytes(bin); +} + +int gf128_equ_hex(gf128_t a, const char *s) +{ + uint8_t bin1[16]; + uint8_t bin2[16]; + size_t len; + hex_to_bytes(s, strlen(s), bin1, &len); + gf128_to_bytes(a, bin2); + return memcmp(bin1, bin2, sizeof(bin1)) == 0; +} + +void gf128_print_bits(gf128_t a) +{ + int i; + for (i = 0; i < 64; i++) { + printf("%d", (int)(a.hi % 2)); + a.hi >>= 1; + } + for (i = 0; i < 64; i++) { + printf("%d", (int)(a.lo % 2)); + a.lo >>= 1; + } + printf("\n"); +} + +int gf128_print(FILE *fp, int fmt, int ind, const char *label, gf128_t a) +{ + uint8_t be[16]; + int i; + + printf("%s: ", label); + gf128_to_bytes(a, be); + for (i = 0; i < 16; i++) { + printf("%02x", be[i]); + } + printf("\n"); + return 1; +} + + +static uint64_t reverse_bits(uint64_t a) +{ + uint64_t r = 0; + int i; + + for (i = 0; i < 63; i++) { + r |= a & 1; + r <<= 1; + a >>= 1; + } + r |= a & 1; + return r; +} + +gf128_t gf128_from_bytes(const uint8_t p[16]) +{ + gf128_t r; + + r.lo = GETU64(p); + r.hi = GETU64(p + 8); + + r.lo = reverse_bits(r.lo); + r.hi = reverse_bits(r.hi); + return r; +} + +void gf128_to_bytes(gf128_t a, uint8_t p[16]) +{ + a.lo = reverse_bits(a.lo); + a.hi = reverse_bits(a.hi); + PUTU64(p, a.lo); + PUTU64(p + 8, a.hi); +} + +gf128_t gf128_add(gf128_t ga, gf128_t gb) +{ + + uint8_t r[16], a[16], b[16]; + + gf128_to_bytes(ga, a); + gf128_to_bytes(gb, b); + + __m128i a1 = _mm_loadu_si128((const __m128i*)a); + __m128i b1 = _mm_loadu_si128((const __m128i*)b); + __m128i T0 = _mm_xor_si128(a1, b1); + + _mm_storeu_si128((__m128i*)r, T0); + + return gf128_from_bytes(r); + +} + + +gf128_t gf128_mul(gf128_t ga, gf128_t gb) +{ + uint8_t r[16], a[16], b[16]; + + const __m128i MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + gf128_to_bytes(ga, a); + gf128_to_bytes(gb, b); + + __m128i a1 = _mm_loadu_si128((const __m128i*)a); + __m128i b1 = _mm_loadu_si128((const __m128i*)b); + + a1 = _mm_shuffle_epi8(a1, MASK); + b1 = _mm_shuffle_epi8(b1, MASK); + + __m128i T0, T1, T2, T3, T4, T5; + + T0 = _mm_clmulepi64_si128(a1, b1, 0x00); + T1 = _mm_clmulepi64_si128(a1, b1, 0x01); + T2 = _mm_clmulepi64_si128(a1, b1, 0x10); + T3 = _mm_clmulepi64_si128(a1, b1, 0x11); + + T1 = _mm_xor_si128(T1, T2); + T2 = _mm_slli_si128(T1, 8); + T1 = _mm_srli_si128(T1, 8); + T0 = _mm_xor_si128(T0, T2); + T3 = _mm_xor_si128(T3, T1); + + T4 = _mm_srli_epi32(T0, 31); + T0 = _mm_slli_epi32(T0, 1); + + T5 = _mm_srli_epi32(T3, 31); + T3 = _mm_slli_epi32(T3, 1); + + T2 = _mm_srli_si128(T4, 12); + T5 = _mm_slli_si128(T5, 4); + T4 = _mm_slli_si128(T4, 4); + T0 = _mm_or_si128(T0, T4); + T3 = _mm_or_si128(T3, T5); + T3 = _mm_or_si128(T3, T2); + + T4 = _mm_slli_epi32(T0, 31); + T5 = _mm_slli_epi32(T0, 30); + T2 = _mm_slli_epi32(T0, 25); + + T4 = _mm_xor_si128(T4, T5); + T4 = _mm_xor_si128(T4, T2); + T5 = _mm_srli_si128(T4, 4); + T3 = _mm_xor_si128(T3, T5); + T4 = _mm_slli_si128(T4, 12); + T0 = _mm_xor_si128(T0, T4); + T3 = _mm_xor_si128(T3, T0); + + T4 = _mm_srli_epi32(T0, 1); + T1 = _mm_srli_epi32(T0, 2); + T2 = _mm_srli_epi32(T0, 7); + T3 = _mm_xor_si128(T3, T1); + T3 = _mm_xor_si128(T3, T2); + T3 = _mm_xor_si128(T3, T4); + + T3 = _mm_shuffle_epi8(T3, MASK); + + _mm_storeu_si128((__m128i*)r, T3); + + return gf128_from_bytes(r); +} + +gf128_t gf128_mul2(gf128_t ga) +{ + uint8_t r[16], a[16]; + + const __m128i MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + __m128i MASK1 = _mm_set_epi8(0xe1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); + __m128i MASK2 = _mm_set_epi8(0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); + + __m128i T0, T1, T2, T3, T4, T5; + + gf128_to_bytes(ga, a); + + __m128i a1 = _mm_loadu_si128((const __m128i*)a); + a1 = _mm_shuffle_epi8(a1, MASK); + + T0 = _mm_srli_epi64(a1,1); + + T1 = _mm_slli_epi64(a1,63); + T2 = _mm_shuffle_epi32(T1,0x0C); + + T3 = _mm_shuffle_epi32(T1,0x40); + T4 = _mm_cmpeq_epi8(T3,MASK2); + T3 = _mm_and_si128(T4,MASK1); + + T5 = _mm_xor_si128(T0,T2); + T5 = _mm_xor_si128(T5,T3); + + T5 = _mm_shuffle_epi8(T5, MASK); + + _mm_storeu_si128((__m128i*)r, T5); + + return gf128_from_bytes(r); +} diff --git a/tools/sm3speed.c b/tools/sm3speed.c new file mode 100644 index 00000000..42f6d401 --- /dev/null +++ b/tools/sm3speed.c @@ -0,0 +1,155 @@ +/* + * Copyright 2014-2022 The GmSSL Project. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#include + +static volatile int finish; + +VOID CALLBACK TimerProc_sm3(HWND hwnd, UINT message, UINT iTimerID, DWORD dwTime) +{ + finish = 0; +} + +int test_sm3() +{ + int sizebox[] = {16, 64, 256, 1024, 8192, 16384}; + int countbox[6] = {0}; + uint8_t **testhex; + HCRYPTPROV hCryptProv; + + testhex = (uint8_t **)malloc(sizeof(uint8_t *) * 6); + for (int i = 0; i < 6; i++) + { + testhex[i] = (uint8_t *)malloc(sizebox[i]); + CryptGenRandom(hCryptProv, sizebox[i], testhex[i]); + } + + uint8_t dgst[32]; + int count; + + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm3 for 3s on %d size blocks: ", sizebox[i]); + UINT_PTR iTimerID = SetTimer(NULL, 0, 3000, TimerProc_sm3); + while (finish) + { + sm3_digest(testhex[i], sizebox[i], dgst); + count++; + } + KillTimer(NULL, iTimerID); + countbox[i] = count; + printf("%d sm3's in 3s\n", count); + } + printf("type\t\t16 bytes\t64 bytes\t256 bytes\t1024 bytes\t8192 bytes\t16384 bytes\n"); + printf("sm3\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + + for (int i = 0; i < 6; i++) + { + free(testhex[i]); + } + free(testhex); + return 1; +} +#else +#include +#include + +static volatile int finish; + +void sig_alm_handler_sm3(int sig_num) +{ + if (sig_num = SIGALRM) + finish = 0; +} + +int test_sm3() +{ + int sizebox[] = {16, 64, 256, 1024, 8192, 16384}; + int countbox[6] = {0}; + uint8_t **testhex; + + FILE *fs_p = fopen("/dev/urandom", "r"); + if (NULL == fs_p) + { + printf("Can not open /dev/urandom\n"); + return -1; + } + + testhex = (uint8_t **)malloc(sizeof(uint8_t *) * 6); + for (int i = 0; i < 6; i++) + { + testhex[i] = (uint8_t *)malloc(sizebox[i]); + fread(testhex[i], sizebox[i], 1, fs_p); + } + + fclose(fs_p); + + uint8_t dgst[32]; + int count; + + signal(SIGALRM, sig_alm_handler_sm3); + + struct itimerval new_value, old_value; + new_value.it_value.tv_sec = 3; + new_value.it_value.tv_usec = 0; + new_value.it_interval.tv_sec = 0; + new_value.it_interval.tv_usec = 0; + + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm3 for 3s on %d size blocks: ", sizebox[i]); + setitimer(ITIMER_REAL, &new_value, &old_value); + while (finish) + { + sm3_digest(testhex[i], sizebox[i], dgst); + count++; + } + countbox[i] = count; + printf("%d sm3's in 3s\n", count); + } + printf("type\t\t16 bytes\t64 bytes\t256 bytes\t1024 bytes\t8192 bytes\t16384 bytes\n"); + printf("sm3\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + + for (int i = 0; i < 6; i++) + { + free(testhex[i]); + } + free(testhex); + return 1; +} +#endif + +int sm3speed_main(void) +{ + test_sm3(); + return 1; +} diff --git a/tools/sm4speed.c b/tools/sm4speed.c new file mode 100644 index 00000000..cc90b455 --- /dev/null +++ b/tools/sm4speed.c @@ -0,0 +1,360 @@ +/* + * Copyright 2014-2022 The GmSSL Project. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#include + +static volatile int finish; + +VOID CALLBACK TimerProc_sm4(HWND hwnd, UINT message, UINT iTimerID, DWORD dwTime) +{ + finish = 0; +} + +int test_sm4() +{ + uint8_t user_key[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t iv[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t ctr[16] = {0}; + uint8_t mac[16] = {0}; + uint8_t aad[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t out[16384] = {0}; + + SM4_KEY key; + int sizebox[] = {16, 64, 256, 1024, 8192, 16384}; + int countbox[18] = {0}; + uint8_t *testhex[]; + HCRYPTPROV hCryptProv; + + testhex = (uint8_t **)malloc(sizeof(uint8_t *) * 6); + for (int i = 0; i < 6; i++) + { + testhex[i] = (uint8_t *)malloc(sizebox[i]); + CryptGenRandom(hCryptProv, sizebox[i], testhex[i]); + } + + int count; + + sm4_set_encrypt_key(&key, user_key); + + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-cbc for 3s on %d size blocks: ", sizebox[i]); + UINT_PTR iTimerID = SetTimer(NULL, 0, 3000, TimerProc_sm4); + while (finish) + { + sm4_cbc_encrypt(&key, iv, testhex[i], sizebox[i] / 16, out); + count++; + } + KillTimer(NULL, iTimerID); + countbox[i] = count; + printf("%d sm4-cbc's in 3s\n", count); + } + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-ctr for 3s on %d size blocks: ", sizebox[i]); + UINT_PTR iTimerID = SetTimer(NULL, 0, 3000, TimerProc); + while (finish) + { + sm4_ctr_encrypt(&key, ctr, testhex[i], sizebox[i], out); + count++; + } + KillTimer(NULL, iTimerID); + countbox[i + 6] = count; + printf("%d sm4-ctr's in 3s\n", count); + } + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-gcm for 3s on %d size blocks: ", sizebox[i]); + UINT_PTR iTimerID = SetTimer(NULL, 0, 3000, TimerProc); + while (finish) + { + sm4_gcm_encrypt(&key, iv, 16, aad, 16, testhex[i], sizebox[i], out, 16, mac); + count++; + } + KillTimer(NULL, iTimerID); + countbox[i + 12] = count; + printf("%d sm4-gcm's in 3s\n", count); + } + printf("type\t\t16 bytes\t64 bytes\t256 bytes\t1024 bytes\t8192 bytes\t16384 bytes\n"); + printf("sm4-cbc\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + printf("sm4-ctr\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i + 6] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + printf("sm4-gcm\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i + 12] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + + for (int i = 0; i < 6; i++) + { + free(testhex[i]); + } + free(testhex); + return 1; +} + +#else +#include +#include + +static volatile int finish; + +void sig_alm_handler_sm4(int sig_num) +{ + if (sig_num = SIGALRM) + finish = 0; +} + +int test_sm4() +{ + uint8_t user_key[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t iv[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t ctr[16] = {0}; + uint8_t mac[16] = {0}; + uint8_t aad[16] = { + 0x01, + 0x23, + 0x45, + 0x67, + 0x89, + 0xab, + 0xcd, + 0xef, + 0xfe, + 0xdc, + 0xba, + 0x98, + 0x76, + 0x54, + 0x32, + 0x10, + }; + uint8_t out[16384] = {0}; + int count; + + SM4_KEY key; + int sizebox[] = {16, 64, 256, 1024, 8192, 16384}; + int countbox[18] = {0}; + uint8_t **testhex; + + FILE *fs_p = fopen("/dev/urandom", "r"); + if (NULL == fs_p) + { + printf("Can not open /dev/urandom\n"); + return -1; + } + + testhex = (uint8_t **)malloc(sizeof(uint8_t *) * 6); + for (int i = 0; i < 6; i++) + { + testhex[i] = (uint8_t *)malloc(sizebox[i]); + fread(testhex[i], sizebox[i], 1, fs_p); + } + + fclose(fs_p); + + signal(SIGALRM, sig_alm_handler_sm4); + + struct itimerval new_value, old_value; + new_value.it_value.tv_sec = 3; + new_value.it_value.tv_usec = 0; + new_value.it_interval.tv_sec = 0; + new_value.it_interval.tv_usec = 0; + + sm4_set_encrypt_key(&key, user_key); + + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-cbc for 3s on %d size blocks: ", sizebox[i]); + setitimer(ITIMER_REAL, &new_value, &old_value); + while (finish) + { + sm4_cbc_encrypt(&key, iv, testhex[i], sizebox[i] / 16, out); + count++; + } + + countbox[i] = count; + printf("%d sm4-cbc's in 3s\n", count); + } + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-ctr for 3s on %d size blocks: ", sizebox[i]); + setitimer(ITIMER_REAL, &new_value, &old_value); + while (finish) + { + sm4_ctr_encrypt(&key, ctr, testhex[i], sizebox[i], out); + count++; + } + + countbox[i + 6] = count; + printf("%d sm4-ctr's in 3s\n", count); + } + for (int i = 0; i < 6; i++) + { + finish = 1; + count = 0; + printf("Doing sm4-gcm for 3s on %d size blocks: ", sizebox[i]); + setitimer(ITIMER_REAL, &new_value, &old_value); + while (finish) + { + sm4_gcm_encrypt(&key, iv, 16, aad, 16, testhex[i], sizebox[i], out, 16, mac); + count++; + } + countbox[i + 12] = count; + printf("%d sm4-gcm's in 3s\n", count); + } + printf("type\t\t16 bytes\t64 bytes\t256 bytes\t1024 bytes\t8192 bytes\t16384 bytes\n"); + printf("sm4-cbc\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + printf("sm4-ctr\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i + 6] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + printf("sm4-gcm\t"); + for (int i = 0; i < 6; i++) + { + printf("\t%.2fK", countbox[i + 12] * sizebox[i] / 1024 / 3.00); + } + printf("\n"); + + for (int i = 0; i < 6; i++) + { + free(testhex[i]); + } + free(testhex); + return 1; +} + +#endif + +int sm4speed_main(void) +{ + test_sm4(); + return 1; +}