mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-07-01 18:23:38 +08:00
quantum init
This commit is contained in:
@@ -1,11 +1,4 @@
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -29,11 +22,10 @@
|
||||
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
|
||||
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
|
||||
# pressure with notable relative improvement, achieving 1.0 cycle per
|
||||
# byte processed with 128-bit key on Haswell processor, 0.74 - on
|
||||
# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
|
||||
# measurements for favourable packet size, one divisible by 96.
|
||||
# Applications using the EVP interface will observe a few percent
|
||||
# worse performance.]
|
||||
# byte processed with 128-bit key on Haswell processor, and 0.74 -
|
||||
# on Broadwell. [Mentioned results are raw profiled measurements for
|
||||
# favourable packet size, one divisible by 96. Applications using the
|
||||
# EVP interface will observe a few percent worse performance.]
|
||||
#
|
||||
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
||||
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
|
||||
@@ -68,7 +60,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
|
||||
$avx = ($2>=3.0) + ($2>3.0);
|
||||
}
|
||||
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
if ($avx>1) {{{
|
||||
@@ -116,23 +108,6 @@ _aesni_ctr32_ghash_6x:
|
||||
vpxor $rndkey,$inout3,$inout3
|
||||
vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey
|
||||
vpclmulqdq \$0x01,$Hkey,$Z3,$Z2
|
||||
|
||||
# At this point, the current block of 96 (0x60) bytes has already been
|
||||
# loaded into registers. Concurrently with processing it, we want to
|
||||
# load the next 96 bytes of input for the next round. Obviously, we can
|
||||
# only do this if there are at least 96 more bytes of input beyond the
|
||||
# input we're currently processing, or else we'd read past the end of
|
||||
# the input buffer. Here, we set |%r12| to 96 if there are at least 96
|
||||
# bytes of input beyond the 96 bytes we're already processing, and we
|
||||
# set |%r12| to 0 otherwise. In the case where we set |%r12| to 96,
|
||||
# we'll read in the next block so that it is in registers for the next
|
||||
# loop iteration. In the case where we set |%r12| to 0, we'll re-read
|
||||
# the current block and then ignore what we re-read.
|
||||
#
|
||||
# At this point, |$in0| points to the current (already read into
|
||||
# registers) block, and |$end0| points to 2*96 bytes before the end of
|
||||
# the input. Thus, |$in0| > |$end0| means that we do not have the next
|
||||
# 96-byte block to read in, and |$in0| <= |$end0| means we do.
|
||||
xor %r12,%r12
|
||||
cmp $in0,$end0
|
||||
|
||||
@@ -425,9 +400,6 @@ $code.=<<___;
|
||||
.align 32
|
||||
aesni_gcm_decrypt:
|
||||
xor $ret,$ret
|
||||
|
||||
# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
|
||||
# bytes of input.
|
||||
cmp \$0x60,$len # minimal accepted length
|
||||
jb .Lgcm_dec_abort
|
||||
|
||||
@@ -482,15 +454,7 @@ $code.=<<___;
|
||||
vmovdqu 0x50($inp),$Z3 # I[5]
|
||||
lea ($inp),$in0
|
||||
vmovdqu 0x40($inp),$Z0
|
||||
|
||||
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||
# bytes before the end of the input. Note, in particular, that this is
|
||||
# correct even if |$len| is not an even multiple of 96 or 16. XXX: This
|
||||
# seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must
|
||||
# not be near the very beginning of the address space when |$len| < 2*96
|
||||
# (0xc0).
|
||||
lea -0xc0($inp,$len),$end0
|
||||
|
||||
vmovdqu 0x30($inp),$Z1
|
||||
shr \$4,$len
|
||||
xor $ret,$ret
|
||||
@@ -646,10 +610,6 @@ _aesni_ctr32_6x:
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
xor $ret,$ret
|
||||
|
||||
# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
|
||||
# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
|
||||
# least 96 more bytes of input.
|
||||
cmp \$0x60*3,$len # minimal accepted length
|
||||
jb .Lgcm_enc_abort
|
||||
|
||||
@@ -699,16 +659,7 @@ $code.=<<___;
|
||||
.Lenc_no_key_aliasing:
|
||||
|
||||
lea ($out),$in0
|
||||
|
||||
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||
# bytes before the end of the input. Note, in particular, that this is
|
||||
# correct even if |$len| is not an even multiple of 96 or 16. Unlike in
|
||||
# the decryption case, there's no caveat that |$out| must not be near
|
||||
# the very beginning of the address space, because we know that
|
||||
# |$len| >= 3*96 from the check above, and so we know
|
||||
# |$out| + |$len| >= 2*96 (0xc0).
|
||||
lea -0xc0($out,$len),$end0
|
||||
|
||||
shr \$4,$len
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
|
||||
Reference in New Issue
Block a user