mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-05-07 08:56:17 +08:00
Change sm4_cl_encrypt to sm4_cl_ctr32_encrypt
This commit is contained in:
@@ -28,7 +28,7 @@ extern "C" {
|
||||
|
||||
typedef struct {
|
||||
uint32_t rk[32];
|
||||
size_t workgroup_size;
|
||||
//size_t workgroup_size;
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
cl_program program;
|
||||
@@ -40,7 +40,7 @@ typedef struct {
|
||||
|
||||
int sm4_cl_set_encrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16]);
|
||||
int sm4_cl_set_decrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16]);
|
||||
int sm4_cl_encrypt(SM4_CL_CTX *ctx, const uint8_t *in, size_t nblocks, uint8_t *out);
|
||||
int sm4_cl_ctr32_encrypt(SM4_CL_CTX *ctx, uint8_t iv[16], const uint8_t *in, size_t nblocks, uint8_t *out);
|
||||
void sm4_cl_cleanup(SM4_CL_CTX *ctx);
|
||||
|
||||
|
||||
|
||||
82
src/sm4_cl.c
82
src/sm4_cl.c
@@ -13,6 +13,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <gmssl/sm4_cl.h>
|
||||
#include <gmssl/endian.h>
|
||||
#include <gmssl/error.h>
|
||||
|
||||
|
||||
@@ -197,15 +198,18 @@ static int sm4_cl_set_key(SM4_CL_CTX *ctx, const uint8_t key[16], int enc)
|
||||
free(log);
|
||||
goto end;
|
||||
}
|
||||
if (!(ctx->kernel = clCreateKernel(ctx->program, "sm4_encrypt", &err))) {
|
||||
if (!(ctx->kernel = clCreateKernel(ctx->program, "sm4_ctr32_encrypt", &err))) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
/*
|
||||
// Apple M2 the CL_KERNEL_WORK_GROUP_SIZE is 256, but the valid work_group_size is 32
|
||||
if ((err = clGetKernelWorkGroupInfo(ctx->kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(ctx->workgroup_size), &ctx->workgroup_size, NULL)) != CL_SUCCESS) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
*/
|
||||
|
||||
if (enc) {
|
||||
sm4_set_encrypt_key((SM4_KEY *)ctx->rk, key);
|
||||
@@ -239,51 +243,71 @@ int sm4_cl_set_decrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16])
|
||||
return sm4_cl_set_key(ctx, key, 0);
|
||||
}
|
||||
|
||||
int sm4_cl_encrypt(SM4_CL_CTX *ctx, const uint8_t *in, size_t nblocks, uint8_t *out)
|
||||
int sm4_cl_ctr32_encrypt(SM4_CL_CTX *ctx, uint8_t iv[16], const uint8_t *in, size_t nblocks, uint8_t *out)
|
||||
{
|
||||
int ret = -1;
|
||||
cl_mem mem;
|
||||
cl_int err;
|
||||
size_t len = 16 * nblocks;
|
||||
uint32_t ctr[4];
|
||||
cl_mem mem_ctr = NULL;
|
||||
size_t inlen = SM4_BLOCK_SIZE * nblocks;
|
||||
cl_mem mem_buf = NULL;
|
||||
cl_uint dim = 1;
|
||||
size_t global_work_size = nblocks;
|
||||
size_t local_work_size = 32; //ctx->workgroup_size;
|
||||
void *p;
|
||||
size_t local_work_size = 32;
|
||||
|
||||
if (out != in)
|
||||
memcpy(out, in, len);
|
||||
if (global_work_size % local_work_size) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(mem = clCreateBuffer(ctx->context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, len, out, &err))) {
|
||||
ctr[0] = GETU32(iv);
|
||||
ctr[1] = GETU32(iv + 4);
|
||||
ctr[2] = GETU32(iv + 8);
|
||||
ctr[3] = GETU32(iv + 12);
|
||||
|
||||
if (!(mem_ctr = clCreateBuffer(ctx->context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, sizeof(ctr), ctr, &err))) {
|
||||
cl_error_print(err);
|
||||
return -1;
|
||||
}
|
||||
if ((err = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &mem)) != CL_SUCCESS) {
|
||||
if ((err = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &mem_ctr)) != CL_SUCCESS) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (out != in) {
|
||||
memcpy(out, in, inlen);
|
||||
}
|
||||
if (!(mem_buf = clCreateBuffer(ctx->context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, inlen, out, &err))) {
|
||||
cl_error_print(err);
|
||||
return -1;
|
||||
}
|
||||
if ((err = clSetKernelArg(ctx->kernel, 2, sizeof(cl_mem), &mem_buf)) != CL_SUCCESS) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
|
||||
// on Apple M2, CL_KERNEL_WORK_GROUP_SIZE = 256
|
||||
// but kernel will fail when local_work_size > 32.
|
||||
// local_work_size might be restricted by the resources the kernel used.
|
||||
if ((err = clEnqueueNDRangeKernel(ctx->queue, ctx->kernel,
|
||||
dim, NULL, &global_work_size, &local_work_size, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
if (!(p = clEnqueueMapBuffer(ctx->queue, mem, CL_TRUE, 0, 0, len, 0, NULL, NULL, &err))) {
|
||||
if (!clEnqueueMapBuffer(ctx->queue, mem_buf, CL_TRUE, 0, 0, inlen, 0, NULL, NULL, &err)) {
|
||||
cl_error_print(err);
|
||||
goto end;
|
||||
}
|
||||
if (p != out) {
|
||||
fprintf(stderr, "%s %d: shit\n", __FILE__, __LINE__);
|
||||
goto end;
|
||||
}
|
||||
|
||||
ctr[3] += (uint32_t)nblocks;
|
||||
PUTU32(iv + 12, ctr[3]);
|
||||
ret = 1;
|
||||
end:
|
||||
clReleaseMemObject(mem);
|
||||
if (mem_ctr) clReleaseMemObject(mem_ctr);
|
||||
if (mem_buf) clReleaseMemObject(mem_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define KERNEL(...) #__VA_ARGS__
|
||||
static const char *sm4_cl_src = KERNEL(
|
||||
|
||||
@@ -306,16 +330,14 @@ __constant unsigned char SBOX[256] = {
|
||||
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
|
||||
};
|
||||
|
||||
__kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned char *data)
|
||||
__kernel void sm4_ctr32_encrypt(__global const unsigned int *rkey, __global const unsigned int *ctr, __global unsigned char *data)
|
||||
{
|
||||
__local unsigned char S[256];
|
||||
__local unsigned int rk[32];
|
||||
|
||||
unsigned int x0, x1, x2, x3, x4, i, t;
|
||||
uint global_id = get_global_id(0);
|
||||
__global unsigned char *p = data + 16 * global_id;
|
||||
__global unsigned int *in = (__global unsigned int *)p;
|
||||
__global unsigned int *out = (__global unsigned int *)p;
|
||||
__global unsigned int *out = (__global unsigned int *)(data + 16 * global_id);
|
||||
|
||||
if (get_local_id(0) == 0) {
|
||||
for (i = 0; i < 256; i++) {
|
||||
@@ -326,10 +348,10 @@ __kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned c
|
||||
}
|
||||
}
|
||||
|
||||
x0 = (in[0] >> 24) | ((in[0] >> 8) & 0xff00) | ((in[0] << 8) & 0xff0000) | (in[0] << 24);
|
||||
x1 = (in[1] >> 24) | ((in[1] >> 8) & 0xff00) | ((in[1] << 8) & 0xff0000) | (in[1] << 24);
|
||||
x2 = (in[2] >> 24) | ((in[2] >> 8) & 0xff00) | ((in[2] << 8) & 0xff0000) | (in[2] << 24);
|
||||
x3 = (in[3] >> 24) | ((in[3] >> 8) & 0xff00) | ((in[3] << 8) & 0xff0000) | (in[3] << 24);
|
||||
x0 = ctr[0];
|
||||
x1 = ctr[1];
|
||||
x2 = ctr[2];
|
||||
x3 = ctr[3] + global_id;
|
||||
|
||||
for (i = 0; i < 31; i++) {
|
||||
x4 = x1 ^ x2 ^ x3 ^ rk[i];
|
||||
@@ -357,10 +379,10 @@ __kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned c
|
||||
((x4 << 18) | (x4 >> (32 - 18))) ^
|
||||
((x4 << 24) | (x4 >> (32 - 24))));
|
||||
|
||||
out[0] = (x4 >> 24) | ((x4 >> 8) & 0xff00) | ((x4 << 8) & 0xff0000) | (x4 << 24);
|
||||
out[1] = (x3 >> 24) | ((x3 >> 8) & 0xff00) | ((x3 << 8) & 0xff0000) | (x3 << 24);
|
||||
out[2] = (x2 >> 24) | ((x2 >> 8) & 0xff00) | ((x2 << 8) & 0xff0000) | (x2 << 24);
|
||||
out[3] = (x1 >> 24) | ((x1 >> 8) & 0xff00) | ((x1 << 8) & 0xff0000) | (x1 << 24);
|
||||
out[0] ^= (x4 >> 24) | ((x4 >> 8) & 0xff00) | ((x4 << 8) & 0xff0000) | (x4 << 24);
|
||||
out[1] ^= (x3 >> 24) | ((x3 >> 8) & 0xff00) | ((x3 << 8) & 0xff0000) | (x3 << 24);
|
||||
out[2] ^= (x2 >> 24) | ((x2 >> 8) & 0xff00) | ((x2 << 8) & 0xff0000) | (x2 << 24);
|
||||
out[3] ^= (x1 >> 24) | ((x1 >> 8) & 0xff00) | ((x1 << 8) & 0xff0000) | (x1 << 24);
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
@@ -18,64 +18,64 @@
|
||||
#include <gmssl/error.h>
|
||||
|
||||
|
||||
|
||||
int test_sm4_cl(void)
|
||||
static int test_sm4_cl_ctr32_encrypt(void)
|
||||
{
|
||||
const uint8_t key[16] = {
|
||||
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
|
||||
0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
|
||||
};
|
||||
const uint8_t plaintext[16] = {
|
||||
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
|
||||
0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
|
||||
};
|
||||
const uint8_t ciphertext[16] = {
|
||||
0x68, 0x1e, 0xdf, 0x34, 0xd2, 0x06, 0x96, 0x5e,
|
||||
0x86, 0xb3, 0xe9, 0x4f, 0x53, 0x6e, 0x42, 0x46,
|
||||
};
|
||||
const char *key_hex = "0123456789abcdeffedcba9876543210";
|
||||
const char *iv_hex = "0123456789abcdeffedcba9876543210";
|
||||
const char *plain_hex = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbccccccccccccccccddddddddddddddddeeeeeeeeeeeeeeeeffffffffffffffffeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaa";
|
||||
const char *cipher_hex = "c2b4759e78ac3cf43d0852f4e8d5f9fd7256e8a5fcb65a350ee00630912e44492a0b17e1b85b060d0fba612d8a95831638b361fd5ffacd942f081485a83ca35d";
|
||||
|
||||
int ret = -1;
|
||||
SM4_CL_CTX ctx;
|
||||
size_t nblocks = 1024;
|
||||
uint8_t key[16];
|
||||
uint8_t iv[16];
|
||||
uint8_t ctr[16];
|
||||
size_t nblocks = 64;
|
||||
uint8_t *buf = NULL;
|
||||
uint8_t *ciphertext = NULL;
|
||||
size_t len;
|
||||
size_t i;
|
||||
|
||||
|
||||
if (!(buf = (uint8_t *)malloc(16 * nblocks))) {
|
||||
error_print();
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
memcpy(buf + 16 * i, plaintext, 16);
|
||||
if (!(ciphertext = (uint8_t *)malloc(16 * nblocks))) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
|
||||
hex_to_bytes(key_hex, strlen(key_hex), key, &len);
|
||||
hex_to_bytes(iv_hex, strlen(iv_hex), iv, &len);
|
||||
hex_to_bytes(plain_hex, strlen(plain_hex), buf, &len);
|
||||
hex_to_bytes(cipher_hex, strlen(cipher_hex), ciphertext, &len);
|
||||
|
||||
if (sm4_cl_set_encrypt_key(&ctx, key) != 1) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
if (sm4_cl_encrypt(&ctx, buf, nblocks, buf) != 1) {
|
||||
|
||||
memcpy(ctr, iv, sizeof(iv));
|
||||
if (sm4_cl_ctr32_encrypt(&ctx, ctr, buf, nblocks, buf) != 1) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
//fprintf(stderr, "%zu ", i);
|
||||
//format_bytes(stderr, 0, 0, "ciphertext", buf + 16*i, 16);
|
||||
if (memcmp(buf + 16 * i, ciphertext, 16) != 0) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
if (memcmp(buf, ciphertext, len) != 0) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("%s() ok\n", __FUNCTION__);
|
||||
ret = 1;
|
||||
end:
|
||||
if (buf) free(buf);
|
||||
sm4_cl_cleanup(&ctx);
|
||||
if (buf) free(buf);
|
||||
if (ciphertext) free(ciphertext);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int test_sm4_cl_speed(void)
|
||||
static int test_sm4_cl_ctr32_encrypt_speed(void)
|
||||
{
|
||||
const uint8_t key[16] = {
|
||||
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
|
||||
@@ -90,15 +90,16 @@ int test_sm4_cl_speed(void)
|
||||
0x86, 0xb3, 0xe9, 0x4f, 0x53, 0x6e, 0x42, 0x46,
|
||||
};
|
||||
|
||||
|
||||
int ret = -1;
|
||||
SM4_CL_CTX ctx;
|
||||
uint8_t ctr[16];
|
||||
size_t nblocks = 1024*1024;
|
||||
uint8_t *buf = NULL;
|
||||
clock_t start, end;
|
||||
double seconds;
|
||||
size_t i;
|
||||
|
||||
|
||||
if (!(buf = (uint8_t *)malloc(16 * nblocks))) {
|
||||
error_print();
|
||||
return -1;
|
||||
@@ -113,19 +114,15 @@ int test_sm4_cl_speed(void)
|
||||
}
|
||||
|
||||
start = clock();
|
||||
if (sm4_cl_encrypt(&ctx, buf, nblocks, buf) != 1) {
|
||||
if (sm4_cl_ctr32_encrypt(&ctx, ctr, buf, nblocks, buf) != 1) {
|
||||
error_print();
|
||||
goto end;
|
||||
}
|
||||
end = clock();
|
||||
|
||||
seconds = (double)(end - start)/CLOCKS_PER_SEC;
|
||||
|
||||
fprintf(stderr, "sm4_cl_encrypt: %f-MiB per seconds\n", 16/seconds);
|
||||
|
||||
|
||||
|
||||
|
||||
ret = 1;
|
||||
end:
|
||||
if (buf) free(buf);
|
||||
@@ -135,8 +132,8 @@ end:
|
||||
|
||||
int main(void)
|
||||
{
|
||||
if (test_sm4_cl() != 1) goto err;
|
||||
if (test_sm4_cl_speed() != 1) goto err;
|
||||
if (test_sm4_cl_ctr32_encrypt() != 1) goto err;
|
||||
if (test_sm4_cl_ctr32_encrypt_speed() != 1) goto err;
|
||||
printf("%s all tests passed\n", __FILE__);
|
||||
return 0;
|
||||
err:
|
||||
|
||||
Reference in New Issue
Block a user