Files
GmSSL/src/gf128_arm64.S
hrimfaxi e72f183c39 asm: add missing .note.GNU-stack markers
Add .note.GNU-stack section declarations to hand-written assembly
sources so ELF builds do not produce executables or libraries with
an executable stack.

These assembly implementations do not require an executable stack.
Adding the marker restores normal non-executable stack/NX hardening.

Signed-off-by: hrimfaxi <outmatch@gmail.com>
2026-06-18 10:15:13 +08:00

105 lines
2.2 KiB
ArmAsm

/*
* Copyright 2014-2024 The GmSSL Project. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
#include <gmssl/asm.h>
/* GF(2^128) defined by f(x) = x^128 + x^7 + x^2 + x + 1
f0 = x^128 = x^7 + x^2 + x + 1
ext([a0,a1],[b0,b1],8) => [a1,b0]
a * b
= (a0 + a1 * x^64) + (b0 + b1 * x^64)
= a0 * b0 + (a0 * b1 + a1 * b0) * x^64 + a1 * b1 * x^128
= a0 * b0 + ((a0 + a1)*(b0 + b1) - a0*b0 - a1*b1) * x^64 + a1 * b1 * x^128
= c + e * x^64 + d' * x^128
= c + e0 * x^64 + e1 * x^128 + d' * x^128
= c + e0 * x^64 + (d' + e1) * f0
= c + e0 * x^64 + d * f0
= c + e0 * x^64 + (d0 + d1 * x^64) * f0
= c + e0 * x^64 + d0 * f0 + (d1 * f0) * x^64 -- w = d1 * f0
= c + e0 * x^64 + d0 * f0 + (w0 + w1 * x^64) * x^64
= c + e0 * x^64 + d0 * f0 + w0 * x^64 + w1 * x^128
= c + e0 * x^64 + w0 * x^64 + d0 * f0 + w1 * f0
= c + (e0 + w0) * x^64 + (d0 + w1) * f0
*/
.text
.globl func(gf128_mul)
.align 4
func(gf128_mul):
// load (a0, a1)
ld1 {v1.2d},[x1]
// load (b0, b1)
ld1 {v2.2d},[x2]
// prepare zero
eor v0.16b, v0.16b, v0.16b
// set f(x) = x^7 + x^2 + x + 1 (0x87)
movi v7.16b, #0x87
ushr v7.2d, v7.2d, #56
// Multiply: 3*mul + 2*ext + 4*eor
// c = a0 * b0
pmull v3.1q, v1.1d, v2.1d
// a0 + a1
ext v5.16b, v1.16b, v1.16b, #8
eor v5.16b, v5.16b, v1.16b
// d' = a1 * b1
pmull2 v4.1q, v1.2d, v2.2d
// b0 + b1
ext v6.16b, v2.16b, v2.16b, #8
eor v6.16b, v6.16b, v2.16b
// e = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1
pmull v5.1q, v5.1d, v6.1d
eor v5.16b, v5.16b, v3.16b
eor v5.16b, v5.16b, v4.16b
// Reduce: 2*mul + 3*ext + 5*eor
// d = d' + e1
ext v6.16b, v5.16b, v0.16b, #8
eor v4.16b, v4.16b, v6.16b
// w = d1 * f0
pmull2 v6.1q, v4.2d, v7.2d
// (e0 + w0) * x^64
eor v5.16b, v5.16b, v6.16b
ext v5.16b, v0.16b, v5.16b, #8
// c = c + (e0 + w0) * x^64
eor v3.16b, v3.16b, v5.16b
// (d0 + w1) * f0
ext v6.16b, v6.16b, v6.16b, #8
eor v4.16b, v4.16b, v6.16b
pmull v4.1q, v4.1d, v7.1d
// c += (d0 + w1) * f0
eor v3.16b, v3.16b, v4.16b
// Output
st1 {v3.2d}, [x0]
ret
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif