Add SM2 X86_64 asm

Not working yet
This commit is contained in:
Zhi Guan
2024-05-24 20:04:53 +08:00
parent 32819490c7
commit 1fd989303f
3 changed files with 62 additions and 34 deletions

View File

@@ -27,6 +27,9 @@ option(ENABLE_SM9_ARM64 "Enable SM9_Z256 ARMv8 assembly" OFF)
option(ENABLE_GMUL_ARM64 "Enable GF(2^128) Multiplication AArch64 assembly" OFF) option(ENABLE_GMUL_ARM64 "Enable GF(2^128) Multiplication AArch64 assembly" OFF)
option(ENABLE_SM2_AMD64 "Enable SM2_Z256 X86_64 assembly" OFF)
option(ENABLE_SM3_SSE "Enable SM3 SSE assembly implementation" OFF) option(ENABLE_SM3_SSE "Enable SM3 SSE assembly implementation" OFF)
option(ENABLE_SM4_CTR_AESNI_AVX "Enable SM4 CTR AESNI+AVX assembly implementation" OFF) option(ENABLE_SM4_CTR_AESNI_AVX "Enable SM4 CTR AESNI+AVX assembly implementation" OFF)
@@ -259,6 +262,13 @@ if (ENABLE_SM2_ARM64)
list(APPEND src src/sm2_z256_arm64.S) list(APPEND src src/sm2_z256_arm64.S)
endif() endif()
if (ENABLE_SM2_AMD64)
message(STATUS "ENABLE_SM2_AMD64 is ON")
add_definitions(-DENABLE_SM2_AMD64)
enable_language(ASM)
list(APPEND src src/sm2_z256_amd64.S)
endif()
if (ENABLE_SM2_NEON) if (ENABLE_SM2_NEON)
message(STATUS "ENABLE_SM2_NEON is ON") message(STATUS "ENABLE_SM2_NEON is ON")
add_definitions(-DENABLE_SM2_NEON) add_definitions(-DENABLE_SM2_NEON)

View File

@@ -400,7 +400,7 @@ const uint64_t SM2_Z256_NEG_P[4] = {
1, ((uint64_t)1 << 32) - 1, 0, ((uint64_t)1 << 32), 1, ((uint64_t)1 << 32) - 1, 0, ((uint64_t)1 << 32),
}; };
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modp_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b) void sm2_z256_modp_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{ {
uint64_t c; uint64_t c;
@@ -481,7 +481,7 @@ const uint64_t SM2_Z256_P_PRIME[4] = {
// mont(1) (mod p) = 2^256 mod p = 2^256 - p // mont(1) (mod p) = 2^256 mod p = 2^256 - p
const uint64_t *SM2_Z256_MODP_MONT_ONE = SM2_Z256_NEG_P; const uint64_t *SM2_Z256_MODP_MONT_ONE = SM2_Z256_NEG_P;
#if defined(ENABLE_SM2_ARM64) #if defined(ENABLE_SM2_ARM64) || defined(ENABLE_SM2_AMD64)
// src/sm2_z256_armv8.S // src/sm2_z256_armv8.S
#elif defined(ENABLE_SM2_Z256_NEON) #elif defined(ENABLE_SM2_Z256_NEON)
#include <arm_neon.h> #include <arm_neon.h>
@@ -812,7 +812,7 @@ const uint64_t SM2_Z256_NEG_N[4] = {
0xac440bf6c62abedd, 0x8dfc2094de39fad4, 0x0000000000000000, 0x0000000100000000, 0xac440bf6c62abedd, 0x8dfc2094de39fad4, 0x0000000000000000, 0x0000000100000000,
}; };
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b) void sm2_z256_modn_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{ {
uint64_t c; uint64_t c;
@@ -868,7 +868,7 @@ const uint64_t *sm2_z256_order_minus_one(void) {
const uint64_t *SM2_Z256_MODN_MONT_ONE = SM2_Z256_NEG_N; const uint64_t *SM2_Z256_MODN_MONT_ONE = SM2_Z256_NEG_N;
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_mont_mul(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b) void sm2_z256_modn_mont_mul(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{ {
sm2_z512_t z; sm2_z512_t z;
@@ -917,7 +917,7 @@ void sm2_z256_modn_mul(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
sm2_z256_modn_from_mont(r, r); sm2_z256_modn_from_mont(r, r);
} }
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_mont_sqr(sm2_z256_t r, const sm2_z256_t a) void sm2_z256_modn_mont_sqr(sm2_z256_t r, const sm2_z256_t a)
{ {
sm2_z256_modn_mont_mul(r, a, a); sm2_z256_modn_mont_mul(r, a, a);
@@ -1020,7 +1020,7 @@ void sm2_z256_modn_inv(sm2_z256_t r, const sm2_z256_t a)
} }
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
// mont(mont(a), 1) = aR * 1 * R^-1 (mod n) = a (mod p) // mont(mont(a), 1) = aR * 1 * R^-1 (mod n) = a (mod p)
void sm2_z256_modn_from_mont(sm2_z256_t r, const sm2_z256_t a) void sm2_z256_modn_from_mont(sm2_z256_t r, const sm2_z256_t a)
@@ -1149,7 +1149,7 @@ int sm2_z256_point_get_xy(const SM2_Z256_POINT *P, uint64_t x[4], uint64_t y[4])
return 1; return 1;
} }
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_point_dbl(SM2_Z256_POINT *R, const SM2_Z256_POINT *A) void sm2_z256_point_dbl(SM2_Z256_POINT *R, const SM2_Z256_POINT *A)
{ {
const uint64_t *X1 = A->X; const uint64_t *X1 = A->X;
@@ -1475,7 +1475,7 @@ void sm2_z256_point_copy_affine(SM2_Z256_POINT *R, const SM2_Z256_AFFINE_POINT *
sm2_z256_copy(R->Z, SM2_Z256_MODP_MONT_ONE); sm2_z256_copy(R->Z, SM2_Z256_MODP_MONT_ONE);
} }
#ifndef ENABLE_SM2_ARM64 #if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_point_add_affine(SM2_Z256_POINT *r, const SM2_Z256_POINT *a, const SM2_Z256_AFFINE_POINT *b) void sm2_z256_point_add_affine(SM2_Z256_POINT *r, const SM2_Z256_POINT *a, const SM2_Z256_AFFINE_POINT *b)
{ {
sm2_z256_t U2, S2; sm2_z256_t U2, S2;

View File

@@ -31,10 +31,10 @@ L$Three:
L$ONE_mont: L$ONE_mont:
.quad 0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000 .quad 0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000
.globl _ecp_sm2z256_mul_by_2 .globl func(sm2_z256_modp_dbl)
.p2align 6 .p2align 6
_ecp_sm2z256_mul_by_2: func(sm2_z256_modp_dbl):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -76,10 +76,10 @@ _ecp_sm2z256_mul_by_2:
.globl _ecp_sm2z256_div_by_2 .globl func(sm2_z256_modp_haf)
.p2align 5 .p2align 5
_ecp_sm2z256_div_by_2: func(sm2_z256_modp_haf):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -136,10 +136,10 @@ _ecp_sm2z256_div_by_2:
.globl _ecp_sm2z256_mul_by_3 .globl func(sm2_z256_modp_tri)
.p2align 5 .p2align 5
_ecp_sm2z256_mul_by_3: func(sm2_z256_modp_tri):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -202,10 +202,10 @@ _ecp_sm2z256_mul_by_3:
.globl _ecp_sm2z256_add .globl func(sm2_z256_modp_add)
.p2align 5 .p2align 5
_ecp_sm2z256_add: func(sm2_z256_modp_add):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -248,10 +248,10 @@ _ecp_sm2z256_add:
.globl _ecp_sm2z256_sub .globl func(sm2_z256_modp_sub)
.p2align 5 .p2align 5
_ecp_sm2z256_sub: func(sm2_z256_modp_sub):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -294,10 +294,10 @@ _ecp_sm2z256_sub:
.globl _ecp_sm2z256_neg .globl func(sm2_z256_modp_neg)
.p2align 5 .p2align 5
_ecp_sm2z256_neg: func(sm2_z256_modp_neg):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -341,10 +341,11 @@ _ecp_sm2z256_neg:
.globl _ecp_sm2z256_to_mont .globl func(sm2_z256_modp_to_mont)
.p2align 5 .p2align 5
_ecp_sm2z256_to_mont: func(sm2_z256_modp_to_mont):
// FIXME: swap arg1 arg2
leaq L$RR(%rip),%rdx leaq L$RR(%rip),%rdx
jmp L$mul_mont jmp L$mul_mont
@@ -355,10 +356,10 @@ _ecp_sm2z256_to_mont:
.globl _ecp_sm2z256_mul_mont .globl func(sm2_z256_modp_mont_mul)
.p2align 5 .p2align 5
_ecp_sm2z256_mul_mont: func(sm2_z256_modp_mont_mul):
L$mul_mont: L$mul_mont:
pushq %rbp pushq %rbp
pushq %rbx pushq %rbx
@@ -633,10 +634,10 @@ __ecp_sm2z256_mul_montq:
.globl _ecp_sm2z256_sqr_mont .globl func(sm2_z256_modp_mont_sqr)
.p2align 5 .p2align 5
_ecp_sm2z256_sqr_mont: func(sm2_z256_modp_mont_mul):
pushq %rbp pushq %rbp
pushq %rbx pushq %rbx
pushq %r12 pushq %r12
@@ -842,10 +843,10 @@ __ecp_sm2z256_sqr_montq:
.globl _ecp_sm2z256_from_mont .globl func(sm2_z256_modp_from_mont)
.p2align 5 .p2align 5
_ecp_sm2z256_from_mont: func(sm2_z256_modp_from_mont):
pushq %r12 pushq %r12
pushq %r13 pushq %r13
@@ -1218,10 +1219,16 @@ __ecp_sm2z256_mul_by_2q:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.globl _ecp_sm2z256_point_double
.globl func(sm2_z256_point_dbl)
.p2align 5 .p2align 5
_ecp_sm2z256_point_double: func(sm2_z256_point_dbl):
pushq %rbp pushq %rbp
pushq %rbx pushq %rbx
pushq %r12 pushq %r12
@@ -1420,10 +1427,17 @@ L$point_double_shortcutq:
popq %rbp popq %rbp
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.globl _ecp_sm2z256_point_add
.globl func(sm2_z256_point_add)
.p2align 5 .p2align 5
_ecp_sm2z256_point_add: func(sm2_z256_point_add):
pushq %rbp pushq %rbp
pushq %rbx pushq %rbx
pushq %r12 pushq %r12
@@ -1816,10 +1830,14 @@ L$add_doneq:
popq %rbp popq %rbp
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.globl _ecp_sm2z256_point_add_affine
.globl func(sm2_z256_point_add_affine)
.p2align 5 .p2align 5
_ecp_sm2z256_point_add_affine: func(sm2_z256_point_add_affine):
pushq %rbp pushq %rbp
pushq %rbx pushq %rbx
pushq %r12 pushq %r12