quantum init

2026-07-01 18:23:38 +08:00 · 2018-01-04 13:38:57 +08:00
parent d11f845fde
commit 53af3b51ae
2361 changed files with 387455 additions and 144458 deletions
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -1,11 +1,4 @@
-#! /usr/bin/env perl
-# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
+#!/usr/bin/env perl
 #
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -29,11 +22,10 @@
 # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
 # Locktyukhin of Intel Corp. who verified that it reduces shuffles
 # pressure with notable relative improvement, achieving 1.0 cycle per
-# byte processed with 128-bit key on Haswell processor, 0.74 - on
-# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
-# measurements for favourable packet size, one divisible by 96.
-# Applications using the EVP interface will observe a few percent
-# worse performance.]
+# byte processed with 128-bit key on Haswell processor, and 0.74 -
+# on Broadwell. [Mentioned results are raw profiled measurements for
+# favourable packet size, one divisible by 96. Applications using the
+# EVP interface will observe a few percent worse performance.]
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
@@ -68,7 +60,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
 	$avx = ($2>=3.0) + ($2>3.0);
 }

-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;

 if ($avx>1) {{{
@@ -116,23 +108,6 @@ _aesni_ctr32_ghash_6x:
 	  vpxor		$rndkey,$inout3,$inout3
 	  vmovups	0x10-0x80($key),$T2	# borrow $T2 for $rndkey
 	vpclmulqdq	\$0x01,$Hkey,$Z3,$Z2
-
-	# At this point, the current block of 96 (0x60) bytes has already been
-	# loaded into registers. Concurrently with processing it, we want to
-	# load the next 96 bytes of input for the next round. Obviously, we can
-	# only do this if there are at least 96 more bytes of input beyond the
-	# input we're currently processing, or else we'd read past the end of
-	# the input buffer. Here, we set |%r12| to 96 if there are at least 96
-	# bytes of input beyond the 96 bytes we're already processing, and we
-	# set |%r12| to 0 otherwise. In the case where we set |%r12| to 96,
-	# we'll read in the next block so that it is in registers for the next
-	# loop iteration. In the case where we set |%r12| to 0, we'll re-read
-	# the current block and then ignore what we re-read.
-	#
-	# At this point, |$in0| points to the current (already read into
-	# registers) block, and |$end0| points to 2*96 bytes before the end of
-	# the input. Thus, |$in0| > |$end0| means that we do not have the next
-	# 96-byte block to read in, and |$in0| <= |$end0| means we do.
 	xor		%r12,%r12
 	cmp		$in0,$end0

@@ -425,9 +400,6 @@ $code.=<<___;
 .align	32
 aesni_gcm_decrypt:
 	xor	$ret,$ret
-
-	# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
-	# bytes of input.
 	cmp	\$0x60,$len			# minimal accepted length
 	jb	.Lgcm_dec_abort

@@ -482,15 +454,7 @@ $code.=<<___;
 	vmovdqu		0x50($inp),$Z3		# I[5]
 	lea		($inp),$in0
 	vmovdqu		0x40($inp),$Z0
-
-	# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
-	# bytes before the end of the input. Note, in particular, that this is
-	# correct even if |$len| is not an even multiple of 96 or 16. XXX: This
-	# seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must
-	# not be near the very beginning of the address space when |$len| < 2*96
-	# (0xc0).
 	lea		-0xc0($inp,$len),$end0
-
 	vmovdqu		0x30($inp),$Z1
 	shr		\$4,$len
 	xor		$ret,$ret
@@ -646,10 +610,6 @@ _aesni_ctr32_6x:
 .align	32
 aesni_gcm_encrypt:
 	xor	$ret,$ret
-
-	# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
-	# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
-	# least 96 more bytes of input.
 	cmp	\$0x60*3,$len			# minimal accepted length
 	jb	.Lgcm_enc_abort

@@ -699,16 +659,7 @@ $code.=<<___;
 .Lenc_no_key_aliasing:

 	lea		($out),$in0
-
-	# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
-	# bytes before the end of the input. Note, in particular, that this is
-	# correct even if |$len| is not an even multiple of 96 or 16. Unlike in
-	# the decryption case, there's no caveat that |$out| must not be near
-	# the very beginning of the address space, because we know that
-	# |$len| >= 3*96 from the check above, and so we know
-	# |$out| + |$len| >= 2*96 (0xc0).
 	lea		-0xc0($out,$len),$end0
-
 	shr		\$4,$len

 	call		_aesni_ctr32_6x