mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-06-29 09:13:38 +08:00
Add .note.GNU-stack section declarations to hand-written assembly sources so ELF builds do not produce executables or libraries with an executable stack. These assembly implementations do not require an executable stack. Adding the marker restores normal non-executable stack/NX hardening. Signed-off-by: hrimfaxi <outmatch@gmail.com>
105 lines
2.2 KiB
ArmAsm
105 lines
2.2 KiB
ArmAsm
/*
|
|
* Copyright 2014-2024 The GmSSL Project. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
* not use this file except in compliance with the License.
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*/
|
|
|
|
|
|
#include <gmssl/asm.h>
|
|
|
|
|
|
/* GF(2^128) defined by f(x) = x^128 + x^7 + x^2 + x + 1
|
|
|
|
f0 = x^128 = x^7 + x^2 + x + 1
|
|
ext([a0,a1],[b0,b1],8) => [a1,b0]
|
|
|
|
a * b
|
|
= (a0 + a1 * x^64) + (b0 + b1 * x^64)
|
|
= a0 * b0 + (a0 * b1 + a1 * b0) * x^64 + a1 * b1 * x^128
|
|
= a0 * b0 + ((a0 + a1)*(b0 + b1) - a0*b0 - a1*b1) * x^64 + a1 * b1 * x^128
|
|
= c + e * x^64 + d' * x^128
|
|
= c + e0 * x^64 + e1 * x^128 + d' * x^128
|
|
= c + e0 * x^64 + (d' + e1) * f0
|
|
= c + e0 * x^64 + d * f0
|
|
= c + e0 * x^64 + (d0 + d1 * x^64) * f0
|
|
= c + e0 * x^64 + d0 * f0 + (d1 * f0) * x^64 -- w = d1 * f0
|
|
= c + e0 * x^64 + d0 * f0 + (w0 + w1 * x^64) * x^64
|
|
= c + e0 * x^64 + d0 * f0 + w0 * x^64 + w1 * x^128
|
|
= c + e0 * x^64 + w0 * x^64 + d0 * f0 + w1 * f0
|
|
= c + (e0 + w0) * x^64 + (d0 + w1) * f0
|
|
*/
|
|
.text
|
|
|
|
.globl func(gf128_mul)
|
|
.align 4
|
|
|
|
func(gf128_mul):
|
|
// load (a0, a1)
|
|
ld1 {v1.2d},[x1]
|
|
// load (b0, b1)
|
|
ld1 {v2.2d},[x2]
|
|
|
|
// prepare zero
|
|
eor v0.16b, v0.16b, v0.16b
|
|
|
|
// set f(x) = x^7 + x^2 + x + 1 (0x87)
|
|
movi v7.16b, #0x87
|
|
ushr v7.2d, v7.2d, #56
|
|
|
|
// Multiply: 3*mul + 2*ext + 4*eor
|
|
|
|
// c = a0 * b0
|
|
pmull v3.1q, v1.1d, v2.1d
|
|
|
|
// a0 + a1
|
|
ext v5.16b, v1.16b, v1.16b, #8
|
|
eor v5.16b, v5.16b, v1.16b
|
|
|
|
// d' = a1 * b1
|
|
pmull2 v4.1q, v1.2d, v2.2d
|
|
|
|
// b0 + b1
|
|
ext v6.16b, v2.16b, v2.16b, #8
|
|
eor v6.16b, v6.16b, v2.16b
|
|
|
|
// e = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1
|
|
pmull v5.1q, v5.1d, v6.1d
|
|
eor v5.16b, v5.16b, v3.16b
|
|
eor v5.16b, v5.16b, v4.16b
|
|
|
|
// Reduce: 2*mul + 3*ext + 5*eor
|
|
|
|
// d = d' + e1
|
|
ext v6.16b, v5.16b, v0.16b, #8
|
|
eor v4.16b, v4.16b, v6.16b
|
|
|
|
// w = d1 * f0
|
|
pmull2 v6.1q, v4.2d, v7.2d
|
|
|
|
// (e0 + w0) * x^64
|
|
eor v5.16b, v5.16b, v6.16b
|
|
ext v5.16b, v0.16b, v5.16b, #8
|
|
|
|
// c = c + (e0 + w0) * x^64
|
|
eor v3.16b, v3.16b, v5.16b
|
|
|
|
// (d0 + w1) * f0
|
|
ext v6.16b, v6.16b, v6.16b, #8
|
|
eor v4.16b, v4.16b, v6.16b
|
|
pmull v4.1q, v4.1d, v7.1d
|
|
|
|
// c += (d0 + w1) * f0
|
|
eor v3.16b, v3.16b, v4.16b
|
|
|
|
// Output
|
|
st1 {v3.2d}, [x0]
|
|
|
|
ret
|
|
|
|
#ifdef __ELF__
|
|
.section .note.GNU-stack,"",@progbits
|
|
#endif
|