mirror of
https://github.com/guanzhi/GmSSL.git
synced 2026-06-29 09:13:38 +08:00
Merge remote-tracking branch 'origin/master'
# Conflicts: # README.md
This commit is contained in:
@@ -1,160 +0,0 @@
|
||||
#
|
||||
# OpenSSL/crypto/modes/Makefile
|
||||
#
|
||||
|
||||
DIR= modes
|
||||
TOP= ../..
|
||||
CC= cc
|
||||
INCLUDES= -I.. -I$(TOP) -I../../include
|
||||
CFLAG=-g
|
||||
MAKEFILE= Makefile
|
||||
AR= ar r
|
||||
|
||||
MODES_ASM_OBJ=
|
||||
|
||||
CFLAGS= $(INCLUDES) $(CFLAG)
|
||||
ASFLAGS= $(INCLUDES) $(ASFLAG)
|
||||
AFLAGS= $(ASFLAGS)
|
||||
|
||||
GENERAL=Makefile
|
||||
TEST=
|
||||
APPS=
|
||||
|
||||
LIB=$(TOP)/libcrypto.a
|
||||
LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
|
||||
ccm128.c xts128.c wrap128.c
|
||||
LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
|
||||
ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ)
|
||||
|
||||
SRC= $(LIBSRC)
|
||||
|
||||
#EXHEADER= store.h str_compat.h
|
||||
EXHEADER= modes.h
|
||||
HEADER= modes_lcl.h $(EXHEADER)
|
||||
|
||||
ALL= $(GENERAL) $(SRC) $(HEADER)
|
||||
|
||||
top:
|
||||
(cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
|
||||
|
||||
all: lib
|
||||
|
||||
lib: $(LIBOBJ)
|
||||
$(AR) $(LIB) $(LIBOBJ)
|
||||
$(RANLIB) $(LIB) || echo Never mind.
|
||||
@touch lib
|
||||
|
||||
ghash-ia64.s: asm/ghash-ia64.pl
|
||||
$(PERL) asm/ghash-ia64.pl $@ $(CFLAGS)
|
||||
ghash-x86.s: asm/ghash-x86.pl
|
||||
$(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
|
||||
ghash-x86_64.s: asm/ghash-x86_64.pl
|
||||
$(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
|
||||
aesni-gcm-x86_64.s: asm/aesni-gcm-x86_64.pl
|
||||
$(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@
|
||||
ghash-sparcv9.s: asm/ghash-sparcv9.pl
|
||||
$(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
|
||||
ghash-alpha.s: asm/ghash-alpha.pl
|
||||
(preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
|
||||
$(PERL) asm/ghash-alpha.pl > $$preproc && \
|
||||
$(CC) -E -P $$preproc > $@ && rm $$preproc)
|
||||
ghash-parisc.s: asm/ghash-parisc.pl
|
||||
$(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
|
||||
ghashv8-armx.S: asm/ghashv8-armx.pl
|
||||
$(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
|
||||
ghashp8-ppc.s: asm/ghashp8-ppc.pl
|
||||
$(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
|
||||
|
||||
# GNU make "catch all"
|
||||
ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
|
||||
|
||||
ghash-armv4.o: ghash-armv4.S
|
||||
ghashv8-armx.o: ghashv8-armx.S
|
||||
|
||||
files:
|
||||
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
|
||||
|
||||
links:
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
|
||||
|
||||
install:
|
||||
@[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
|
||||
@headerlist="$(EXHEADER)"; for i in $$headerlist; \
|
||||
do \
|
||||
(cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
|
||||
chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
|
||||
done;
|
||||
|
||||
tags:
|
||||
ctags $(SRC)
|
||||
|
||||
tests:
|
||||
|
||||
lint:
|
||||
lint -DLINT $(INCLUDES) $(SRC)>fluff
|
||||
|
||||
update: depend
|
||||
|
||||
depend:
|
||||
@[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
|
||||
$(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
|
||||
|
||||
dclean:
|
||||
$(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
|
||||
mv -f Makefile.new $(MAKEFILE)
|
||||
|
||||
clean:
|
||||
rm -f *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
|
||||
|
||||
# DO NOT DELETE THIS LINE -- make depend depends on it.
|
||||
|
||||
cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h
|
||||
ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h
|
||||
cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h
|
||||
ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h
|
||||
cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h
|
||||
gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h
|
||||
ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
|
||||
wrap128.o: ../../e_os.h ../../include/openssl/bio.h
|
||||
wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
|
||||
wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
|
||||
wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h
|
||||
wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
|
||||
wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
|
||||
wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
|
||||
wrap128.o: ../cryptlib.h wrap128.c
|
||||
xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c
|
||||
@@ -1,160 +0,0 @@
|
||||
#
|
||||
# OpenSSL/crypto/modes/Makefile
|
||||
#
|
||||
|
||||
DIR= modes
|
||||
TOP= ../..
|
||||
CC= cc
|
||||
INCLUDES= -I.. -I$(TOP) -I../../include
|
||||
CFLAG=-g
|
||||
MAKEFILE= Makefile
|
||||
AR= ar r
|
||||
|
||||
MODES_ASM_OBJ=
|
||||
|
||||
CFLAGS= $(INCLUDES) $(CFLAG)
|
||||
ASFLAGS= $(INCLUDES) $(ASFLAG)
|
||||
AFLAGS= $(ASFLAGS)
|
||||
|
||||
GENERAL=Makefile
|
||||
TEST=
|
||||
APPS=
|
||||
|
||||
LIB=$(TOP)/libcrypto.a
|
||||
LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
|
||||
ccm128.c xts128.c wrap128.c
|
||||
LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
|
||||
ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ)
|
||||
|
||||
SRC= $(LIBSRC)
|
||||
|
||||
#EXHEADER= store.h str_compat.h
|
||||
EXHEADER= modes.h
|
||||
HEADER= modes_lcl.h $(EXHEADER)
|
||||
|
||||
ALL= $(GENERAL) $(SRC) $(HEADER)
|
||||
|
||||
top:
|
||||
(cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
|
||||
|
||||
all: lib
|
||||
|
||||
lib: $(LIBOBJ)
|
||||
$(AR) $(LIB) $(LIBOBJ)
|
||||
$(RANLIB) $(LIB) || echo Never mind.
|
||||
@touch lib
|
||||
|
||||
ghash-ia64.s: asm/ghash-ia64.pl
|
||||
$(PERL) asm/ghash-ia64.pl $@ $(CFLAGS)
|
||||
ghash-x86.s: asm/ghash-x86.pl
|
||||
$(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
|
||||
ghash-x86_64.s: asm/ghash-x86_64.pl
|
||||
$(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
|
||||
aesni-gcm-x86_64.s: asm/aesni-gcm-x86_64.pl
|
||||
$(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@
|
||||
ghash-sparcv9.s: asm/ghash-sparcv9.pl
|
||||
$(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
|
||||
ghash-alpha.s: asm/ghash-alpha.pl
|
||||
(preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
|
||||
$(PERL) asm/ghash-alpha.pl > $$preproc && \
|
||||
$(CC) -E -P $$preproc > $@ && rm $$preproc)
|
||||
ghash-parisc.s: asm/ghash-parisc.pl
|
||||
$(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
|
||||
ghashv8-armx.S: asm/ghashv8-armx.pl
|
||||
$(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
|
||||
ghashp8-ppc.s: asm/ghashp8-ppc.pl
|
||||
$(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
|
||||
|
||||
# GNU make "catch all"
|
||||
ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
|
||||
|
||||
ghash-armv4.o: ghash-armv4.S
|
||||
ghashv8-armx.o: ghashv8-armx.S
|
||||
|
||||
files:
|
||||
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
|
||||
|
||||
links:
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
|
||||
@$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
|
||||
|
||||
install:
|
||||
@[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
|
||||
@headerlist="$(EXHEADER)"; for i in $$headerlist; \
|
||||
do \
|
||||
(cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
|
||||
chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
|
||||
done;
|
||||
|
||||
tags:
|
||||
ctags $(SRC)
|
||||
|
||||
tests:
|
||||
|
||||
lint:
|
||||
lint -DLINT $(INCLUDES) $(SRC)>fluff
|
||||
|
||||
update: depend
|
||||
|
||||
depend:
|
||||
@[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
|
||||
$(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
|
||||
|
||||
dclean:
|
||||
$(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
|
||||
mv -f Makefile.new $(MAKEFILE)
|
||||
|
||||
clean:
|
||||
rm -f *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
|
||||
|
||||
# DO NOT DELETE THIS LINE -- make depend depends on it.
|
||||
|
||||
cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h
|
||||
ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h
|
||||
cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h
|
||||
ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h
|
||||
cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h
|
||||
gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h
|
||||
ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
|
||||
wrap128.o: ../../e_os.h ../../include/openssl/bio.h
|
||||
wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
|
||||
wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
|
||||
wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h
|
||||
wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
|
||||
wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
|
||||
wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
|
||||
wrap128.o: ../cryptlib.h wrap128.c
|
||||
xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
|
||||
xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
|
||||
xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
|
||||
xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
|
||||
xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -22,10 +29,11 @@
|
||||
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
|
||||
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
|
||||
# pressure with notable relative improvement, achieving 1.0 cycle per
|
||||
# byte processed with 128-bit key on Haswell processor, and 0.74 -
|
||||
# on Broadwell. [Mentioned results are raw profiled measurements for
|
||||
# favourable packet size, one divisible by 96. Applications using the
|
||||
# EVP interface will observe a few percent worse performance.]
|
||||
# byte processed with 128-bit key on Haswell processor, 0.74 - on
|
||||
# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
|
||||
# measurements for favourable packet size, one divisible by 96.
|
||||
# Applications using the EVP interface will observe a few percent
|
||||
# worse performance.]
|
||||
#
|
||||
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
||||
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
|
||||
@@ -43,7 +51,7 @@ die "can't locate x86_64-xlate.pl";
|
||||
|
||||
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
||||
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
|
||||
$avx = ($1>=2.19) + ($1>=2.22);
|
||||
$avx = ($1>=2.20) + ($1>=2.22);
|
||||
}
|
||||
|
||||
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
|
||||
@@ -56,11 +64,11 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
||||
$avx = ($1>=10) + ($1>=11);
|
||||
}
|
||||
|
||||
if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
|
||||
if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
|
||||
$avx = ($2>=3.0) + ($2>3.0);
|
||||
}
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
if ($avx>1) {{{
|
||||
@@ -108,6 +116,23 @@ _aesni_ctr32_ghash_6x:
|
||||
vpxor $rndkey,$inout3,$inout3
|
||||
vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey
|
||||
vpclmulqdq \$0x01,$Hkey,$Z3,$Z2
|
||||
|
||||
# At this point, the current block of 96 (0x60) bytes has already been
|
||||
# loaded into registers. Concurrently with processing it, we want to
|
||||
# load the next 96 bytes of input for the next round. Obviously, we can
|
||||
# only do this if there are at least 96 more bytes of input beyond the
|
||||
# input we're currently processing, or else we'd read past the end of
|
||||
# the input buffer. Here, we set |%r12| to 96 if there are at least 96
|
||||
# bytes of input beyond the 96 bytes we're already processing, and we
|
||||
# set |%r12| to 0 otherwise. In the case where we set |%r12| to 96,
|
||||
# we'll read in the next block so that it is in registers for the next
|
||||
# loop iteration. In the case where we set |%r12| to 0, we'll re-read
|
||||
# the current block and then ignore what we re-read.
|
||||
#
|
||||
# At this point, |$in0| points to the current (already read into
|
||||
# registers) block, and |$end0| points to 2*96 bytes before the end of
|
||||
# the input. Thus, |$in0| > |$end0| means that we do not have the next
|
||||
# 96-byte block to read in, and |$in0| <= |$end0| means we do.
|
||||
xor %r12,%r12
|
||||
cmp $in0,$end0
|
||||
|
||||
@@ -400,6 +425,9 @@ $code.=<<___;
|
||||
.align 32
|
||||
aesni_gcm_decrypt:
|
||||
xor $ret,$ret
|
||||
|
||||
# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
|
||||
# bytes of input.
|
||||
cmp \$0x60,$len # minimal accepted length
|
||||
jb .Lgcm_dec_abort
|
||||
|
||||
@@ -454,7 +482,15 @@ $code.=<<___;
|
||||
vmovdqu 0x50($inp),$Z3 # I[5]
|
||||
lea ($inp),$in0
|
||||
vmovdqu 0x40($inp),$Z0
|
||||
|
||||
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||
# bytes before the end of the input. Note, in particular, that this is
|
||||
# correct even if |$len| is not an even multiple of 96 or 16. XXX: This
|
||||
# seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must
|
||||
# not be near the very beginning of the address space when |$len| < 2*96
|
||||
# (0xc0).
|
||||
lea -0xc0($inp,$len),$end0
|
||||
|
||||
vmovdqu 0x30($inp),$Z1
|
||||
shr \$4,$len
|
||||
xor $ret,$ret
|
||||
@@ -489,7 +525,7 @@ $code.=<<___;
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
movaps -0xd8(%rax),%xmm6
|
||||
movaps -0xd8(%rax),%xmm7
|
||||
movaps -0xc8(%rax),%xmm7
|
||||
movaps -0xb8(%rax),%xmm8
|
||||
movaps -0xa8(%rax),%xmm9
|
||||
movaps -0x98(%rax),%xmm10
|
||||
@@ -610,6 +646,10 @@ _aesni_ctr32_6x:
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
xor $ret,$ret
|
||||
|
||||
# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
|
||||
# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
|
||||
# least 96 more bytes of input.
|
||||
cmp \$0x60*3,$len # minimal accepted length
|
||||
jb .Lgcm_enc_abort
|
||||
|
||||
@@ -659,7 +699,16 @@ $code.=<<___;
|
||||
.Lenc_no_key_aliasing:
|
||||
|
||||
lea ($out),$in0
|
||||
|
||||
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
|
||||
# bytes before the end of the input. Note, in particular, that this is
|
||||
# correct even if |$len| is not an even multiple of 96 or 16. Unlike in
|
||||
# the decryption case, there's no caveat that |$out| must not be near
|
||||
# the very beginning of the address space, because we know that
|
||||
# |$len| >= 3*96 from the check above, and so we know
|
||||
# |$out| + |$len| >= 2*96 (0xc0).
|
||||
lea -0xc0($out,$len),$end0
|
||||
|
||||
shr \$4,$len
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -454,7 +461,7 @@ rem_4bit:
|
||||
.align 4
|
||||
|
||||
___
|
||||
$output=shift and open STDOUT,">$output";
|
||||
$output=pop and open STDOUT,">$output";
|
||||
print $code;
|
||||
close STDOUT;
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -42,10 +49,10 @@
|
||||
# below and combine it with reduction algorithm from x86 module.
|
||||
# Performance improvement over previous version varies from 65% on
|
||||
# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
|
||||
# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
|
||||
# in 9.33.
|
||||
# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
|
||||
# Snapdragon S4 - in 9.33.
|
||||
#
|
||||
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
|
||||
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
|
||||
# Polynomial Multiplication on ARM Processors using the NEON Engine.
|
||||
#
|
||||
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
|
||||
@@ -71,8 +78,20 @@
|
||||
# *native* byte order on current platform. See gcm128.c for working
|
||||
# example...
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
$flavour = shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
$Xi="r0"; # argument block
|
||||
$Htbl="r1";
|
||||
@@ -124,7 +143,19 @@ $code=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
|
||||
.type rem_4bit,%object
|
||||
.align 5
|
||||
@@ -137,19 +168,27 @@ rem_4bit:
|
||||
|
||||
.type rem_4bit_get,%function
|
||||
rem_4bit_get:
|
||||
sub $rem_4bit,pc,#8
|
||||
sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
|
||||
#if defined(__thumb2__)
|
||||
adr $rem_4bit,rem_4bit
|
||||
#else
|
||||
sub $rem_4bit,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b .Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
.size rem_4bit_get,.-rem_4bit_get
|
||||
|
||||
.global gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,%function
|
||||
.align 4
|
||||
gcm_ghash_4bit:
|
||||
sub r12,pc,#8
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add $len,$inp,$len @ $len to point at the end
|
||||
stmdb sp!,{r3-r11,lr} @ save $len/end too
|
||||
sub r12,r12,#48 @ &rem_4bit
|
||||
|
||||
ldmia r12,{r4-r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4-r11} @ ... to stack
|
||||
@@ -196,6 +235,9 @@ gcm_ghash_4bit:
|
||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
|
||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb $nlo,[$inp,$cnt]
|
||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||
@@ -206,6 +248,9 @@ gcm_ghash_4bit:
|
||||
add $nhi,$nhi,$nhi
|
||||
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
||||
eor $Zll,$Tll,$Zll,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb $Tll,[$Xi,$cnt]
|
||||
eor $Zll,$Zll,$Zlh,lsl#28
|
||||
eor $Zlh,$Tlh,$Zlh,lsr#4
|
||||
@@ -213,8 +258,14 @@ gcm_ghash_4bit:
|
||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl $nlo,$nlo,$Tll
|
||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl $nhi,$nlo,#0xf0
|
||||
andpl $nlo,$nlo,#0x0f
|
||||
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
|
||||
@@ -224,7 +275,11 @@ gcm_ghash_4bit:
|
||||
add $inp,$inp,#16
|
||||
mov $nhi,$Zll
|
||||
___
|
||||
&Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
|
||||
&Zsmash("cmp\t$inp,$len","\n".
|
||||
"#ifdef __thumb2__\n".
|
||||
" it ne\n".
|
||||
"#endif\n".
|
||||
" ldrneb $nlo,[$inp,#15]");
|
||||
$code.=<<___;
|
||||
bne .Louter
|
||||
|
||||
@@ -282,6 +337,9 @@ gcm_gmult_4bit:
|
||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
|
||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb $nlo,[$Xi,$cnt]
|
||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||
@@ -299,6 +357,9 @@ gcm_gmult_4bit:
|
||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl $nhi,$nlo,#0xf0
|
||||
andpl $nlo,$nlo,#0x0f
|
||||
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
||||
@@ -373,9 +434,9 @@ $code.=<<___;
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
vld1.64 $IN#hi,[r1,:64]! @ load H
|
||||
vld1.64 $IN#hi,[r1]! @ load H
|
||||
vmov.i8 $t0,#0xe1
|
||||
vld1.64 $IN#lo,[r1,:64]
|
||||
vld1.64 $IN#lo,[r1]
|
||||
vshl.i64 $t0#hi,#57
|
||||
vshr.u64 $t0#lo,#63 @ t0=0xc2....01
|
||||
vdup.8 $t1,$IN#hi[7]
|
||||
@@ -394,8 +455,8 @@ gcm_init_neon:
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
vld1.64 $IN#hi,[$Xi,:64]! @ load Xi
|
||||
vld1.64 $IN#lo,[$Xi,:64]!
|
||||
vld1.64 $IN#hi,[$Xi]! @ load Xi
|
||||
vld1.64 $IN#lo,[$Xi]!
|
||||
vmov.i64 $k48,#0x0000ffffffffffff
|
||||
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
|
||||
vmov.i64 $k32,#0x00000000ffffffff
|
||||
@@ -412,8 +473,8 @@ gcm_gmult_neon:
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi
|
||||
vld1.64 $Xl#lo,[$Xi,:64]!
|
||||
vld1.64 $Xl#hi,[$Xi]! @ load Xi
|
||||
vld1.64 $Xl#lo,[$Xi]!
|
||||
vmov.i64 $k48,#0x0000ffffffffffff
|
||||
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
|
||||
vmov.i64 $k32,#0x00000000ffffffff
|
||||
@@ -432,12 +493,12 @@ gcm_ghash_neon:
|
||||
veor $IN,$Xl @ inp^=Xi
|
||||
.Lgmult_neon:
|
||||
___
|
||||
&clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
|
||||
&clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
|
||||
$code.=<<___;
|
||||
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
|
||||
___
|
||||
&clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
&clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
|
||||
&clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
&clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
|
||||
$code.=<<___;
|
||||
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
|
||||
veor $Xm,$Xm,$Xh
|
||||
@@ -468,8 +529,8 @@ $code.=<<___;
|
||||
vrev64.8 $Xl,$Xl
|
||||
#endif
|
||||
sub $Xi,#16
|
||||
vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
|
||||
vst1.64 $Xl#lo,[$Xi,:64]
|
||||
vst1.64 $Xl#hi,[$Xi]! @ write out Xi
|
||||
vst1.64 $Xl#lo,[$Xi]
|
||||
|
||||
ret @ bx lr
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
|
||||
247
crypto/modes/asm/ghash-c64xplus.pl
Normal file
247
crypto/modes/asm/ghash-c64xplus.pl
Normal file
@@ -0,0 +1,247 @@
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# December 2011
|
||||
#
|
||||
# The module implements GCM GHASH function and underlying single
|
||||
# multiplication operation in GF(2^128). Even though subroutines
|
||||
# have _4bit suffix, they are not using any tables, but rely on
|
||||
# hardware Galois Field Multiply support. Streamed GHASH processes
|
||||
# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven
|
||||
# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are
|
||||
# comparing apples vs. oranges, but compiler surely could have done
|
||||
# better, because theoretical [though not necessarily achievable]
|
||||
# estimate for "4-bit" table-driven implementation is ~12 cycles.
|
||||
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6"); # arguments
|
||||
|
||||
($Z0,$Z1,$Z2,$Z3, $H0, $H1, $H2, $H3,
|
||||
$H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27));
|
||||
($H01u,$H01y,$H2u,$H3u, $H0y,$H1y,$H2y,$H3y,
|
||||
$H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27));
|
||||
($FF000000,$E10000)=("B30","B31");
|
||||
($xip,$x0,$x1,$xib)=map("B$_",(6..9)); # $xip zaps $len
|
||||
$xia="A9";
|
||||
($rem,$res)=("B4","B5"); # $rem zaps $Htable
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.if .ASSEMBLER_VERSION<7000000
|
||||
.asg 0,__TI_EABI__
|
||||
.endif
|
||||
.if __TI_EABI__
|
||||
.asg gcm_gmult_1bit,_gcm_gmult_1bit
|
||||
.asg gcm_gmult_4bit,_gcm_gmult_4bit
|
||||
.asg gcm_ghash_4bit,_gcm_ghash_4bit
|
||||
.endif
|
||||
|
||||
.asg B3,RA
|
||||
|
||||
.if 0
|
||||
.global _gcm_gmult_1bit
|
||||
_gcm_gmult_1bit:
|
||||
ADDAD $Htable,2,$Htable
|
||||
.endif
|
||||
.global _gcm_gmult_4bit
|
||||
_gcm_gmult_4bit:
|
||||
.asmfunc
|
||||
LDDW *${Htable}[-1],$H1:$H0 ; H.lo
|
||||
LDDW *${Htable}[-2],$H3:$H2 ; H.hi
|
||||
|| MV $Xip,${xip} ; reassign Xi
|
||||
|| MVK 15,B1 ; SPLOOPD constant
|
||||
|
||||
MVK 0xE1,$E10000
|
||||
|| LDBU *++${xip}[15],$x1 ; Xi[15]
|
||||
MVK 0xFF,$FF000000
|
||||
|| LDBU *--${xip},$x0 ; Xi[14]
|
||||
SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
|
||||
SHL $FF000000,24,$FF000000 ; upper byte mask
|
||||
|| BNOP ghash_loop?
|
||||
|| MVK 1,B0 ; take a single spin
|
||||
|
||||
PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
|
||||
AND $H2,$FF000000,$H2u ; H2's upper byte
|
||||
AND $H3,$FF000000,$H3u ; H3's upper byte
|
||||
|| SHRU $H2u,8,$H2u
|
||||
SHRU $H3u,8,$H3u
|
||||
|| ZERO $Z1:$Z0
|
||||
SHRU2 $xia,8,$H01u
|
||||
|| ZERO $Z3:$Z2
|
||||
.endasmfunc
|
||||
|
||||
.global _gcm_ghash_4bit
|
||||
_gcm_ghash_4bit:
|
||||
.asmfunc
|
||||
LDDW *${Htable}[-1],$H1:$H0 ; H.lo
|
||||
|| SHRU $len,4,B0 ; reassign len
|
||||
LDDW *${Htable}[-2],$H3:$H2 ; H.hi
|
||||
|| MV $Xip,${xip} ; reassign Xi
|
||||
|| MVK 15,B1 ; SPLOOPD constant
|
||||
|
||||
MVK 0xE1,$E10000
|
||||
|| [B0] LDNDW *${inp}[1],$H1x:$H0x
|
||||
MVK 0xFF,$FF000000
|
||||
|| [B0] LDNDW *${inp}++[2],$H3x:$H2x
|
||||
SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
|
||||
|| LDDW *${xip}[1],$Z1:$Z0
|
||||
SHL $FF000000,24,$FF000000 ; upper byte mask
|
||||
|| LDDW *${xip}[0],$Z3:$Z2
|
||||
|
||||
PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
|
||||
AND $H2,$FF000000,$H2u ; H2's upper byte
|
||||
AND $H3,$FF000000,$H3u ; H3's upper byte
|
||||
|| SHRU $H2u,8,$H2u
|
||||
SHRU $H3u,8,$H3u
|
||||
SHRU2 $xia,8,$H01u
|
||||
|
||||
|| [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|
||||
|| [B0] XOR $H1x,$Z1,$Z1
|
||||
.if .LITTLE_ENDIAN
|
||||
[B0] XOR $H2x,$Z2,$Z2
|
||||
|| [B0] XOR $H3x,$Z3,$Z3
|
||||
|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
|
||||
STDW $Z1:$Z0,*${xip}[1]
|
||||
|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
|
||||
|| [B0] ZERO $Z1:$Z0
|
||||
.else
|
||||
[B0] XOR $H2x,$Z2,$Z2
|
||||
|| [B0] XOR $H3x,$Z3,$Z3
|
||||
|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
|
||||
STDW $Z1:$Z0,*${xip}[1]
|
||||
|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
|
||||
|| [B0] ZERO $Z1:$Z0
|
||||
.endif
|
||||
STDW $Z3:$Z2,*${xip}[0]
|
||||
|| [B0] ZERO $Z3:$Z2
|
||||
|| [B0] MV $xia,$x1
|
||||
[B0] ADDK 14,${xip}
|
||||
|
||||
ghash_loop?:
|
||||
SPLOOPD 6 ; 6*16+7
|
||||
|| MVC B1,ILC
|
||||
|| [B0] SUB B0,1,B0
|
||||
|| ZERO A0
|
||||
|| ADD $x1,$x1,$xib ; SHL $x1,1,$xib
|
||||
|| SHL $x1,1,$xia
|
||||
___
|
||||
|
||||
########____________________________
|
||||
# 0 D2. M1 M2 |
|
||||
# 1 M1 |
|
||||
# 2 M1 M2 |
|
||||
# 3 D1. M1 M2 |
|
||||
# 4 S1. L1 |
|
||||
# 5 S2 S1x L1 D2 L2 |____________________________
|
||||
# 6/0 L1 S1 L2 S2x |D2. M1 M2 |
|
||||
# 7/1 L1 S1 D1x S2 M2 | M1 |
|
||||
# 8/2 S1 L1x S2 | M1 M2 |
|
||||
# 9/3 S1 L1x | D1. M1 M2 |
|
||||
# 10/4 D1x | S1. L1 |
|
||||
# 11/5 |S2 S1x L1 D2 L2 |____________
|
||||
# 12/6/0 D1x __| L1 S1 L2 S2x |D2. ....
|
||||
# 7/1 L1 S1 D1x S2 M2 | ....
|
||||
# 8/2 S1 L1x S2 | ....
|
||||
#####... ................|............
|
||||
$code.=<<___;
|
||||
XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
|
||||
|| XORMPY $H01u,$xib,$H01y
|
||||
|| [A0] LDBU *--${xip},$x0
|
||||
XORMPY $H1,$xia,$H1x ; 1
|
||||
XORMPY $H2,$xia,$H2x ; 2
|
||||
|| XORMPY $H2u,$xib,$H2y
|
||||
XORMPY $H3,$xia,$H3x ; 3
|
||||
|| XORMPY $H3u,$xib,$H3y
|
||||
||[!A0] MVK.D 15,A0 ; *--${xip} counter
|
||||
XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
|
||||
|| [A0] SUB.S A0,1,A0
|
||||
XOR.L $H1x,$Z1,$Z1 ; 5
|
||||
|| AND.D $H01y,$FF000000,$H0z
|
||||
|| SWAP2.L $H01y,$H1y ; ; SHL $H01y,16,$H1y
|
||||
|| SHL $x0,1,$xib
|
||||
|| SHL $x0,1,$xia
|
||||
|
||||
XOR.L $H2x,$Z2,$Z2 ; 6/0 ; [0,0] in epilogue
|
||||
|| SHL $Z0,1,$rem ; ; rem=Z<<1
|
||||
|| SHRMB.S $Z1,$Z0,$Z0 ; ; Z>>=8
|
||||
|| AND.L $H1y,$FF000000,$H1z
|
||||
XOR.L $H3x,$Z3,$Z3 ; 7/1
|
||||
|| SHRMB.S $Z2,$Z1,$Z1
|
||||
|| XOR.D $H0z,$Z0,$Z0 ; merge upper byte products
|
||||
|| AND.S $H2y,$FF000000,$H2z
|
||||
|| XORMPY $E10000,$rem,$res ; ; implicit rem&0x1FE
|
||||
XOR.L $H1z,$Z1,$Z1 ; 8/2
|
||||
|| SHRMB.S $Z3,$Z2,$Z2
|
||||
|| AND.S $H3y,$FF000000,$H3z
|
||||
XOR.L $H2z,$Z2,$Z2 ; 9/3
|
||||
|| SHRU $Z3,8,$Z3
|
||||
XOR.D $H3z,$Z3,$Z3 ; 10/4
|
||||
NOP ; 11/5
|
||||
|
||||
SPKERNEL 0,2
|
||||
|| XOR.D $res,$Z3,$Z3 ; 12/6/0; Z^=res
|
||||
|
||||
; input pre-fetch is possible where D1 slot is available...
|
||||
[B0] LDNDW *${inp}[1],$H1x:$H0x ; 8/-
|
||||
[B0] LDNDW *${inp}++[2],$H3x:$H2x ; 9/-
|
||||
NOP ; 10/-
|
||||
.if .LITTLE_ENDIAN
|
||||
SWAP2 $Z0,$Z1 ; 11/-
|
||||
|| SWAP4 $Z1,$Z0
|
||||
SWAP4 $Z1,$Z1 ; 12/-
|
||||
|| SWAP2 $Z0,$Z0
|
||||
SWAP2 $Z2,$Z3
|
||||
|| SWAP4 $Z3,$Z2
|
||||
||[!B0] BNOP RA
|
||||
SWAP4 $Z3,$Z3
|
||||
|| SWAP2 $Z2,$Z2
|
||||
|| [B0] BNOP ghash_loop?
|
||||
[B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|
||||
|| [B0] XOR $H1x,$Z1,$Z1
|
||||
[B0] XOR $H2x,$Z2,$Z2
|
||||
|| [B0] XOR $H3x,$Z3,$Z3
|
||||
|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
|
||||
STDW $Z1:$Z0,*${xip}[1]
|
||||
|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
|
||||
|| [B0] ZERO $Z1:$Z0
|
||||
.else
|
||||
[!B0] BNOP RA ; 11/-
|
||||
[B0] BNOP ghash_loop? ; 12/-
|
||||
[B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
|
||||
|| [B0] XOR $H1x,$Z1,$Z1
|
||||
[B0] XOR $H2x,$Z2,$Z2
|
||||
|| [B0] XOR $H3x,$Z3,$Z3
|
||||
|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
|
||||
STDW $Z1:$Z0,*${xip}[1]
|
||||
|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
|
||||
|| [B0] ZERO $Z1:$Z0
|
||||
.endif
|
||||
STDW $Z3:$Z2,*${xip}[0]
|
||||
|| [B0] ZERO $Z3:$Z2
|
||||
|| [B0] MV $xia,$x1
|
||||
[B0] ADDK 14,${xip}
|
||||
.endasmfunc
|
||||
|
||||
.sect .const
|
||||
.cstring "GHASH for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
||||
|
||||
print $code;
|
||||
close STDOUT;
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -32,7 +39,7 @@
|
||||
# Itanium performance should remain the same as the "256B" version,
|
||||
# i.e. ~8.5 cycles.
|
||||
|
||||
$output=shift and (open STDOUT,">$output" or die "can't open $output: $!");
|
||||
$output=pop and (open STDOUT,">$output" or die "can't open $output: $!");
|
||||
|
||||
if ($^O eq "hpux") {
|
||||
$ADDP="addp4";
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -47,7 +54,7 @@ if ($flavour =~ /3[12]/) {
|
||||
$g="g";
|
||||
}
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
$softonly=0;
|
||||
@@ -85,9 +92,7 @@ $code.=<<___ if(!$softonly && 0); # hardware is slow for single block...
|
||||
tmhl %r0,0x4000 # check for message-security-assist
|
||||
jz .Lsoft_gmult
|
||||
lghi %r0,0
|
||||
la %r1,16($sp)
|
||||
.long 0xb93e0004 # kimd %r0,%r4
|
||||
lg %r1,24($sp)
|
||||
lg %r1,24(%r1) # load second word of kimd capabilities vector
|
||||
tmhh %r1,0x4000 # check for function 65
|
||||
jz .Lsoft_gmult
|
||||
stg %r0,16($sp) # arrange 16 bytes of zero input
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -46,14 +53,12 @@
|
||||
# saturates at ~15.5x single-process result on 8-core processor,
|
||||
# or ~20.5GBps per 2.85GHz socket.
|
||||
|
||||
$bits=32;
|
||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
||||
if ($bits==64) { $bias=2047; $frame=192; }
|
||||
else { $bias=0; $frame=112; }
|
||||
|
||||
$output=shift;
|
||||
$output=pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$frame="STACK_FRAME";
|
||||
$bias="STACK_BIAS";
|
||||
|
||||
$Zhi="%o0"; # 64-bit values
|
||||
$Zlo="%o1";
|
||||
$Thi="%o2";
|
||||
@@ -75,11 +80,14 @@ $Htbl="%i1";
|
||||
$inp="%i2";
|
||||
$len="%i3";
|
||||
|
||||
$code.=<<___ if ($bits==64);
|
||||
$code.=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
___
|
||||
$code.=<<___;
|
||||
#endif
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.align 64
|
||||
@@ -183,7 +191,7 @@ gcm_ghash_4bit:
|
||||
|
||||
add $inp,16,$inp
|
||||
cmp $inp,$len
|
||||
be,pn `$bits==64?"%xcc":"%icc"`,.Ldone
|
||||
be,pn SIZE_T_CC,.Ldone
|
||||
and $Zlo,0xf,$remi
|
||||
|
||||
ldx [$Htblo+$nhi],$Tlo
|
||||
@@ -379,7 +387,7 @@ gcm_init_vis3:
|
||||
or $V,%lo(0xA0406080),$V
|
||||
or %l0,%lo(0x20C0E000),%l0
|
||||
sllx $V,32,$V
|
||||
or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
stx $V,[%i0+16]
|
||||
|
||||
ret
|
||||
@@ -399,7 +407,7 @@ gcm_gmult_vis3:
|
||||
|
||||
mov 0xE1,%l7
|
||||
sllx %l7,57,$xE1 ! 57 is not a typo
|
||||
ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
|
||||
xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing
|
||||
xmulx $Xlo,$Hlo,$C0
|
||||
@@ -411,9 +419,9 @@ gcm_gmult_vis3:
|
||||
xmulx $Xhi,$Hhi,$Xhi
|
||||
|
||||
sll $C0,3,$sqr
|
||||
srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
|
||||
srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
|
||||
xor $C0,$sqr,$sqr
|
||||
sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
|
||||
sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
|
||||
|
||||
xor $C0,$C1,$C1 ! Karatsuba post-processing
|
||||
xor $Xlo,$C2,$C2
|
||||
@@ -423,7 +431,7 @@ gcm_gmult_vis3:
|
||||
xor $Xhi,$C2,$C2
|
||||
xor $Xhi,$C1,$C1
|
||||
|
||||
xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
|
||||
xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
|
||||
xor $C0,$C2,$C2
|
||||
xmulx $C1,$xE1,$C0
|
||||
xor $C1,$C3,$C3
|
||||
@@ -445,6 +453,8 @@ gcm_gmult_vis3:
|
||||
.align 32
|
||||
gcm_ghash_vis3:
|
||||
save %sp,-$frame,%sp
|
||||
nop
|
||||
srln $len,0,$len ! needed on v8+, "nop" on v9
|
||||
|
||||
ldx [$Xip+8],$C2 ! load Xi
|
||||
ldx [$Xip+0],$C3
|
||||
@@ -453,7 +463,7 @@ gcm_ghash_vis3:
|
||||
|
||||
mov 0xE1,%l7
|
||||
sllx %l7,57,$xE1 ! 57 is not a typo
|
||||
ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
|
||||
|
||||
and $inp,7,$shl
|
||||
andn $inp,7,$inp
|
||||
@@ -490,9 +500,9 @@ gcm_ghash_vis3:
|
||||
xmulx $Xhi,$Hhi,$Xhi
|
||||
|
||||
sll $C0,3,$sqr
|
||||
srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
|
||||
srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
|
||||
xor $C0,$sqr,$sqr
|
||||
sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
|
||||
sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
|
||||
|
||||
xor $C0,$C1,$C1 ! Karatsuba post-processing
|
||||
xor $Xlo,$C2,$C2
|
||||
@@ -502,7 +512,7 @@ gcm_ghash_vis3:
|
||||
xor $Xhi,$C2,$C2
|
||||
xor $Xhi,$C1,$C1
|
||||
|
||||
xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
|
||||
xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
|
||||
xor $C0,$C2,$C2
|
||||
xmulx $C1,$xE1,$C0
|
||||
xor $C1,$C3,$C3
|
||||
@@ -530,7 +540,7 @@ ___
|
||||
|
||||
# Purpose of these subroutines is to explicitly encode VIS instructions,
|
||||
# so that one can compile the module without having to specify VIS
|
||||
# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
|
||||
# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
|
||||
# Idea is to reserve for option to produce "universal" binary and let
|
||||
# programmer detect if current CPU is VIS capable at run-time.
|
||||
sub unvis3 {
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -88,7 +95,7 @@
|
||||
# where Tproc is time required for Karatsuba pre- and post-processing,
|
||||
# is more realistic estimate. In this case it gives ... 1.91 cycles.
|
||||
# Or in other words, depending on how well we can interleave reduction
|
||||
# and one of the two multiplications the performance should be betwen
|
||||
# and one of the two multiplications the performance should be between
|
||||
# 1.91 and 2.16. As already mentioned, this implementation processes
|
||||
# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart
|
||||
# - in 2.02. x86_64 performance is better, because larger register
|
||||
@@ -129,6 +136,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output=pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
|
||||
|
||||
$sse2=0;
|
||||
@@ -358,7 +368,7 @@ $S=12; # shift factor for rem_4bit
|
||||
# effective address calculation and finally merge of value to Z.hi.
|
||||
# Reference to rem_4bit is scheduled so late that I had to >>4
|
||||
# rem_4bit elements. This resulted in 20-45% procent improvement
|
||||
# on contemporary µ-archs.
|
||||
# on contemporary µ-archs.
|
||||
{
|
||||
my $cnt;
|
||||
my $rem_4bit = "eax";
|
||||
@@ -712,7 +722,7 @@ sub mmx_loop() {
|
||||
&pxor ($red[1],$red[1]);
|
||||
&pxor ($red[2],$red[2]);
|
||||
|
||||
# Just like in "May" verson modulo-schedule for critical path in
|
||||
# Just like in "May" version modulo-schedule for critical path in
|
||||
# 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor'
|
||||
# is scheduled so late that rem_8bit[] has to be shifted *right*
|
||||
# by 16, which is why last argument to pinsrw is 2, which
|
||||
@@ -1138,7 +1148,7 @@ my ($Xhi,$Xi) = @_;
|
||||
&movdqu (&QWP(0,$Xip),$Xi);
|
||||
&function_end("gcm_ghash_clmul");
|
||||
|
||||
} else { # Algorith 5. Kept for reference purposes.
|
||||
} else { # Algorithm 5. Kept for reference purposes.
|
||||
|
||||
sub reduction_alg5 { # 19/16 times faster than Intel version
|
||||
my ($Xhi,$Xi)=@_;
|
||||
@@ -1369,6 +1379,8 @@ my ($Xhi,$Xi)=@_;
|
||||
&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
# A question was risen about choice of vanilla MMX. Or rather why wasn't
|
||||
# SSE2 chosen instead? In addition to the fact that MMX runs on legacy
|
||||
# CPUs such as PIII, "4-bit" MMX version was observed to provide better
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -64,8 +71,10 @@
|
||||
# Ivy Bridge 1.80(+7%)
|
||||
# Haswell 0.55(+93%) (if system doesn't support AVX)
|
||||
# Broadwell 0.45(+110%)(if system doesn't support AVX)
|
||||
# Skylake 0.44(+110%)(if system doesn't support AVX)
|
||||
# Bulldozer 1.49(+27%)
|
||||
# Silvermont 2.88(+13%)
|
||||
# Goldmont 1.08(+24%)
|
||||
|
||||
# March 2013
|
||||
#
|
||||
@@ -74,8 +83,8 @@
|
||||
# CPUs such as Sandy and Ivy Bridge can execute it, the code performs
|
||||
# sub-optimally in comparison to above mentioned version. But thanks
|
||||
# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
|
||||
# it performs in 0.41 cycles per byte on Haswell processor, and in
|
||||
# 0.29 on Broadwell.
|
||||
# it performs in 0.41 cycles per byte on Haswell processor, in
|
||||
# 0.29 on Broadwell, and in 0.36 on Skylake.
|
||||
#
|
||||
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
||||
|
||||
@@ -92,7 +101,7 @@ die "can't locate x86_64-xlate.pl";
|
||||
|
||||
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
||||
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
|
||||
$avx = ($1>=2.19) + ($1>=2.22);
|
||||
$avx = ($1>=2.20) + ($1>=2.22);
|
||||
}
|
||||
|
||||
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
|
||||
@@ -105,11 +114,11 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
||||
$avx = ($1>=10) + ($1>=11);
|
||||
}
|
||||
|
||||
if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
|
||||
if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
|
||||
$avx = ($2>=3.0) + ($2>3.0);
|
||||
}
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$do4xaggr=1;
|
||||
@@ -576,15 +585,15 @@ $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
|
||||
# experimental alternative. special thing about is that there
|
||||
# no dependency between the two multiplications...
|
||||
mov \$`0xE1<<1`,%eax
|
||||
mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
|
||||
mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
|
||||
mov \$0x07,%r11d
|
||||
movq %rax,$T1
|
||||
movq %r10,$T2
|
||||
movq %r11,$T3 # borrow $T3
|
||||
pand $Xi,$T3
|
||||
pshufb $T3,$T2 # ($Xi&7)·0xE0
|
||||
pshufb $T3,$T2 # ($Xi&7)·0xE0
|
||||
movq %rax,$T3
|
||||
pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
|
||||
pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
|
||||
pxor $Xi,$T2
|
||||
pslldq \$15,$T2
|
||||
paddd $T2,$T2 # <<(64+56+1)
|
||||
@@ -657,7 +666,7 @@ $code.=<<___;
|
||||
je .Lskip4x
|
||||
|
||||
sub \$0x30,$len
|
||||
mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
|
||||
mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
|
||||
movdqu 0x30($Htbl),$Hkey3
|
||||
movdqu 0x40($Htbl),$Hkey4
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -18,6 +25,12 @@
|
||||
# faster than "4-bit" integer-only compiler-generated 64-bit code.
|
||||
# "Initial version" means that there is room for futher improvement.
|
||||
|
||||
# May 2016
|
||||
#
|
||||
# 2x aggregated reduction improves performance by 50% (resulting
|
||||
# performance on POWER8 is 1 cycle per processed byte), and 4x
|
||||
# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
|
||||
|
||||
$flavour=shift;
|
||||
$output =shift;
|
||||
|
||||
@@ -27,14 +40,21 @@ if ($flavour =~ /64/) {
|
||||
$STU="stdu";
|
||||
$POP="ld";
|
||||
$PUSH="std";
|
||||
$UCMP="cmpld";
|
||||
$SHRI="srdi";
|
||||
} elsif ($flavour =~ /32/) {
|
||||
$SIZE_T=4;
|
||||
$LRSAVE=$SIZE_T;
|
||||
$STU="stwu";
|
||||
$POP="lwz";
|
||||
$PUSH="stw";
|
||||
$UCMP="cmplw";
|
||||
$SHRI="srwi";
|
||||
} else { die "nonsense $flavour"; }
|
||||
|
||||
$sp="r1";
|
||||
$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
||||
@@ -46,6 +66,7 @@ my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
|
||||
|
||||
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
|
||||
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
|
||||
my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
|
||||
my $vrsave="r12";
|
||||
|
||||
$code=<<___;
|
||||
@@ -56,7 +77,7 @@ $code=<<___;
|
||||
.globl .gcm_init_p8
|
||||
.align 5
|
||||
.gcm_init_p8:
|
||||
lis r0,0xfff0
|
||||
li r0,-4096
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
@@ -78,17 +99,103 @@ $code=<<___;
|
||||
vsl $H,$H,$t0 # H<<=1
|
||||
vsrab $t1,$t1,$t2 # broadcast carry bit
|
||||
vand $t1,$t1,$xC2
|
||||
vxor $H,$H,$t1 # twisted H
|
||||
vxor $IN,$H,$t1 # twisted H
|
||||
|
||||
vsldoi $H,$H,$H,8 # twist even more ...
|
||||
vsldoi $H,$IN,$IN,8 # twist even more ...
|
||||
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
|
||||
vsldoi $Hl,$zero,$H,8 # ... and split
|
||||
vsldoi $Hh,$H,$zero,8
|
||||
|
||||
stvx_u $xC2,0,r3 # save pre-computed table
|
||||
stvx_u $Hl,r8,r3
|
||||
li r8,0x40
|
||||
stvx_u $H, r9,r3
|
||||
li r9,0x50
|
||||
stvx_u $Hh,r10,r3
|
||||
li r10,0x60
|
||||
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $IN1,$Xl,$t1
|
||||
|
||||
vsldoi $H2,$IN1,$IN1,8
|
||||
vsldoi $H2l,$zero,$H2,8
|
||||
vsldoi $H2h,$H2,$zero,8
|
||||
|
||||
stvx_u $H2l,r8,r3 # save H^2
|
||||
li r8,0x70
|
||||
stvx_u $H2,r9,r3
|
||||
li r9,0x80
|
||||
stvx_u $H2h,r10,r3
|
||||
li r10,0x90
|
||||
___
|
||||
{
|
||||
my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
|
||||
$code.=<<___;
|
||||
vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
|
||||
vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
|
||||
vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
|
||||
vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
|
||||
vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
|
||||
vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vsldoi $t4,$Xm1,$zero,8
|
||||
vsldoi $t5,$zero,$Xm1,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
vxor $Xl1,$Xl1,$t4
|
||||
vxor $Xh1,$Xh1,$t5
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vsldoi $Xl1,$Xl1,$Xl1,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
vxor $Xl1,$Xl1,$t6
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vpmsumd $Xl1,$Xl1,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $t5,$t5,$Xh1
|
||||
vxor $Xl,$Xl,$t1
|
||||
vxor $Xl1,$Xl1,$t5
|
||||
|
||||
vsldoi $H,$Xl,$Xl,8
|
||||
vsldoi $H2,$Xl1,$Xl1,8
|
||||
vsldoi $Hl,$zero,$H,8
|
||||
vsldoi $Hh,$H,$zero,8
|
||||
vsldoi $H2l,$zero,$H2,8
|
||||
vsldoi $H2h,$H2,$zero,8
|
||||
|
||||
stvx_u $Hl,r8,r3 # save H^3
|
||||
li r8,0xa0
|
||||
stvx_u $H,r9,r3
|
||||
li r9,0xb0
|
||||
stvx_u $Hh,r10,r3
|
||||
li r10,0xc0
|
||||
stvx_u $H2l,r8,r3 # save H^4
|
||||
stvx_u $H2,r9,r3
|
||||
stvx_u $H2h,r10,r3
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
@@ -96,7 +203,9 @@ $code=<<___;
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size .gcm_init_p8,.-.gcm_init_p8
|
||||
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.globl .gcm_gmult_p8
|
||||
.align 5
|
||||
.gcm_gmult_p8:
|
||||
@@ -118,11 +227,11 @@ $code=<<___;
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $zero,$zero,$zero
|
||||
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st phase
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
@@ -132,7 +241,7 @@ $code=<<___;
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $Xl,$Xl,$t1
|
||||
@@ -150,7 +259,7 @@ $code=<<___;
|
||||
.globl .gcm_ghash_p8
|
||||
.align 5
|
||||
.gcm_ghash_p8:
|
||||
lis r0,0xfff8
|
||||
li r0,-4096
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
@@ -159,33 +268,85 @@ $code=<<___;
|
||||
lvx_u $Xl,0,$Xip # load Xi
|
||||
|
||||
lvx_u $Hl,r8,$Htbl # load pre-computed table
|
||||
li r8,0x40
|
||||
le?lvsl $lemask,r0,r0
|
||||
lvx_u $H, r9,$Htbl
|
||||
li r9,0x50
|
||||
le?vspltisb $t0,0x07
|
||||
lvx_u $Hh,r10,$Htbl
|
||||
li r10,0x60
|
||||
le?vxor $lemask,$lemask,$t0
|
||||
lvx_u $xC2,0,$Htbl
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
vxor $zero,$zero,$zero
|
||||
|
||||
${UCMP}i $len,64
|
||||
bge Lgcm_ghash_p8_4x
|
||||
|
||||
lvx_u $IN,0,$inp
|
||||
addi $inp,$inp,16
|
||||
subi $len,$len,16
|
||||
subic. $len,$len,16
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $IN,$IN,$Xl
|
||||
b Loop
|
||||
beq Lshort
|
||||
|
||||
lvx_u $H2l,r8,$Htbl # load H^2
|
||||
li r8,16
|
||||
lvx_u $H2, r9,$Htbl
|
||||
add r9,$inp,$len # end of input
|
||||
lvx_u $H2h,r10,$Htbl
|
||||
be?b Loop_2x
|
||||
|
||||
.align 5
|
||||
Loop:
|
||||
subic $len,$len,16
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
subfe. r0,r0,r0 # borrow?-1:0
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
Loop_2x:
|
||||
lvx_u $IN1,0,$inp
|
||||
le?vperm $IN1,$IN1,$IN1,$lemask
|
||||
|
||||
subic $len,$len,32
|
||||
vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
|
||||
vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
|
||||
subfe r0,r0,r0 # borrow?-1:0
|
||||
vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
|
||||
vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
|
||||
and r0,r0,$len
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
|
||||
vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
|
||||
add $inp,$inp,r0
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st phase
|
||||
vxor $Xl,$Xl,$Xl1
|
||||
vxor $Xm,$Xm,$Xm1
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xh,$Xh,$Xh1
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
lvx_u $IN,r8,$inp
|
||||
addi $inp,$inp,32
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $IN,$IN,$t1
|
||||
vxor $IN,$IN,$Xl
|
||||
$UCMP r9,$inp
|
||||
bgt Loop_2x # done yet?
|
||||
|
||||
cmplwi $len,0
|
||||
bne Leven
|
||||
|
||||
Lshort:
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
@@ -194,17 +355,12 @@ Loop:
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
lvx_u $IN,0,$inp
|
||||
addi $inp,$inp,16
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $IN,$IN,$t1
|
||||
vxor $IN,$IN,$Xl
|
||||
beq Loop # did $len-=16 borrow?
|
||||
|
||||
Leven:
|
||||
vxor $Xl,$Xl,$t1
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
stvx_u $Xl,0,$Xip # write out Xi
|
||||
@@ -214,6 +370,284 @@ Loop:
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
___
|
||||
{
|
||||
my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
|
||||
$Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
|
||||
my $IN0=$IN;
|
||||
my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.gcm_ghash_p8_4x:
|
||||
Lgcm_ghash_p8_4x:
|
||||
$STU $sp,-$FRAME($sp)
|
||||
li r10,`15+6*$SIZE_T`
|
||||
li r11,`31+6*$SIZE_T`
|
||||
stvx v20,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v21,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v22,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v23,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v24,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v25,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v26,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v27,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v28,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v29,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v30,r10,$sp
|
||||
li r10,0x60
|
||||
stvx v31,r11,$sp
|
||||
li r0,-1
|
||||
stw $vrsave,`$FRAME-4`($sp) # save vrsave
|
||||
mtspr 256,r0 # preserve all AltiVec registers
|
||||
|
||||
lvsl $t0,0,r8 # 0x0001..0e0f
|
||||
#lvx_u $H2l,r8,$Htbl # load H^2
|
||||
li r8,0x70
|
||||
lvx_u $H2, r9,$Htbl
|
||||
li r9,0x80
|
||||
vspltisb $t1,8 # 0x0808..0808
|
||||
#lvx_u $H2h,r10,$Htbl
|
||||
li r10,0x90
|
||||
lvx_u $H3l,r8,$Htbl # load H^3
|
||||
li r8,0xa0
|
||||
lvx_u $H3, r9,$Htbl
|
||||
li r9,0xb0
|
||||
lvx_u $H3h,r10,$Htbl
|
||||
li r10,0xc0
|
||||
lvx_u $H4l,r8,$Htbl # load H^4
|
||||
li r8,0x10
|
||||
lvx_u $H4, r9,$Htbl
|
||||
li r9,0x20
|
||||
lvx_u $H4h,r10,$Htbl
|
||||
li r10,0x30
|
||||
|
||||
vsldoi $t2,$zero,$t1,8 # 0x0000..0808
|
||||
vaddubm $hiperm,$t0,$t2 # 0x0001..1617
|
||||
vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
|
||||
|
||||
$SHRI $len,$len,4 # this allows to use sign bit
|
||||
# as carry
|
||||
lvx_u $IN0,0,$inp # load input
|
||||
lvx_u $IN1,r8,$inp
|
||||
subic. $len,$len,8
|
||||
lvx_u $IN2,r9,$inp
|
||||
lvx_u $IN3,r10,$inp
|
||||
addi $inp,$inp,0x40
|
||||
le?vperm $IN0,$IN0,$IN0,$lemask
|
||||
le?vperm $IN1,$IN1,$IN1,$lemask
|
||||
le?vperm $IN2,$IN2,$IN2,$lemask
|
||||
le?vperm $IN3,$IN3,$IN3,$lemask
|
||||
|
||||
vxor $Xh,$IN0,$Xl
|
||||
|
||||
vpmsumd $Xl1,$IN1,$H3l
|
||||
vpmsumd $Xm1,$IN1,$H3
|
||||
vpmsumd $Xh1,$IN1,$H3h
|
||||
|
||||
vperm $H21l,$H2,$H,$hiperm
|
||||
vperm $t0,$IN2,$IN3,$loperm
|
||||
vperm $H21h,$H2,$H,$loperm
|
||||
vperm $t1,$IN2,$IN3,$hiperm
|
||||
vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
|
||||
vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
|
||||
vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
|
||||
vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
|
||||
|
||||
vxor $Xm2,$Xm2,$Xm1
|
||||
vxor $Xl3,$Xl3,$Xl1
|
||||
vxor $Xm3,$Xm3,$Xm2
|
||||
vxor $Xh3,$Xh3,$Xh1
|
||||
|
||||
blt Ltail_4x
|
||||
|
||||
Loop_4x:
|
||||
lvx_u $IN0,0,$inp
|
||||
lvx_u $IN1,r8,$inp
|
||||
subic. $len,$len,4
|
||||
lvx_u $IN2,r9,$inp
|
||||
lvx_u $IN3,r10,$inp
|
||||
addi $inp,$inp,0x40
|
||||
le?vperm $IN1,$IN1,$IN1,$lemask
|
||||
le?vperm $IN2,$IN2,$IN2,$lemask
|
||||
le?vperm $IN3,$IN3,$IN3,$lemask
|
||||
le?vperm $IN0,$IN0,$IN0,$lemask
|
||||
|
||||
vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
|
||||
vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
|
||||
vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
|
||||
vpmsumd $Xl1,$IN1,$H3l
|
||||
vpmsumd $Xm1,$IN1,$H3
|
||||
vpmsumd $Xh1,$IN1,$H3h
|
||||
|
||||
vxor $Xl,$Xl,$Xl3
|
||||
vxor $Xm,$Xm,$Xm3
|
||||
vxor $Xh,$Xh,$Xh3
|
||||
vperm $t0,$IN2,$IN3,$loperm
|
||||
vperm $t1,$IN2,$IN3,$hiperm
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
|
||||
vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
|
||||
vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
|
||||
vxor $Xl3,$Xl3,$Xl1
|
||||
vxor $Xh3,$Xh3,$Xh1
|
||||
vxor $Xh,$Xh,$IN0
|
||||
vxor $Xm2,$Xm2,$Xm1
|
||||
vxor $Xh,$Xh,$t1
|
||||
vxor $Xm3,$Xm3,$Xm2
|
||||
vxor $Xh,$Xh,$Xl
|
||||
bge Loop_4x
|
||||
|
||||
Ltail_4x:
|
||||
vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
|
||||
vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
|
||||
vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
|
||||
|
||||
vxor $Xl,$Xl,$Xl3
|
||||
vxor $Xm,$Xm,$Xm3
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xh,$Xh,$Xh3
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $Xl,$Xl,$t1
|
||||
|
||||
addic. $len,$len,4
|
||||
beq Ldone_4x
|
||||
|
||||
lvx_u $IN0,0,$inp
|
||||
${UCMP}i $len,2
|
||||
li $len,-4
|
||||
blt Lone
|
||||
lvx_u $IN1,r8,$inp
|
||||
beq Ltwo
|
||||
|
||||
Lthree:
|
||||
lvx_u $IN2,r9,$inp
|
||||
le?vperm $IN0,$IN0,$IN0,$lemask
|
||||
le?vperm $IN1,$IN1,$IN1,$lemask
|
||||
le?vperm $IN2,$IN2,$IN2,$lemask
|
||||
|
||||
vxor $Xh,$IN0,$Xl
|
||||
vmr $H4l,$H3l
|
||||
vmr $H4, $H3
|
||||
vmr $H4h,$H3h
|
||||
|
||||
vperm $t0,$IN1,$IN2,$loperm
|
||||
vperm $t1,$IN1,$IN2,$hiperm
|
||||
vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
|
||||
vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
|
||||
vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
|
||||
vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
|
||||
|
||||
vxor $Xm3,$Xm3,$Xm2
|
||||
b Ltail_4x
|
||||
|
||||
.align 4
|
||||
Ltwo:
|
||||
le?vperm $IN0,$IN0,$IN0,$lemask
|
||||
le?vperm $IN1,$IN1,$IN1,$lemask
|
||||
|
||||
vxor $Xh,$IN0,$Xl
|
||||
vperm $t0,$zero,$IN1,$loperm
|
||||
vperm $t1,$zero,$IN1,$hiperm
|
||||
|
||||
vsldoi $H4l,$zero,$H2,8
|
||||
vmr $H4, $H2
|
||||
vsldoi $H4h,$H2,$zero,8
|
||||
|
||||
vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
|
||||
vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
|
||||
vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
|
||||
|
||||
b Ltail_4x
|
||||
|
||||
.align 4
|
||||
Lone:
|
||||
le?vperm $IN0,$IN0,$IN0,$lemask
|
||||
|
||||
vsldoi $H4l,$zero,$H,8
|
||||
vmr $H4, $H
|
||||
vsldoi $H4h,$H,$zero,8
|
||||
|
||||
vxor $Xh,$IN0,$Xl
|
||||
vxor $Xl3,$Xl3,$Xl3
|
||||
vxor $Xm3,$Xm3,$Xm3
|
||||
vxor $Xh3,$Xh3,$Xh3
|
||||
|
||||
b Ltail_4x
|
||||
|
||||
Ldone_4x:
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
stvx_u $Xl,0,$Xip # write out Xi
|
||||
|
||||
li r10,`15+6*$SIZE_T`
|
||||
li r11,`31+6*$SIZE_T`
|
||||
mtspr 256,$vrsave
|
||||
lvx v20,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v21,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v22,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v23,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v24,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v25,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v26,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v27,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v28,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v29,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v30,r10,$sp
|
||||
lvx v31,r11,$sp
|
||||
addi $sp,$sp,$FRAME
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x04,0,0x80,0,4,0
|
||||
.long 0
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.size .gcm_ghash_p8,.-.gcm_ghash_p8
|
||||
|
||||
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
@@ -221,6 +655,8 @@ Loop:
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/geo;
|
||||
|
||||
if ($flavour =~ /le$/o) { # little-endian
|
||||
s/le\?//o or
|
||||
s/be\?/#be#/o;
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -27,11 +34,21 @@
|
||||
# Apple A7 0.92 5.62
|
||||
# Cortex-A53 1.01 8.39
|
||||
# Cortex-A57 1.17 7.61
|
||||
# Denver 0.71 6.02
|
||||
# Mongoose 1.10 8.06
|
||||
#
|
||||
# (*) presented for reference/comparison purposes;
|
||||
|
||||
$flavour = shift;
|
||||
open STDOUT,">".shift;
|
||||
$output = shift;
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$Xi="x0"; # argument block
|
||||
$Htbl="x1";
|
||||
@@ -135,10 +152,10 @@ gcm_gmult_v8:
|
||||
#endif
|
||||
vext.8 $IN,$t1,$t1,#8
|
||||
|
||||
vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
|
||||
vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
|
||||
veor $t1,$t1,$IN @ Karatsuba pre-processing
|
||||
vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
|
||||
vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
|
||||
vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
@@ -226,7 +243,7 @@ $code.=<<___;
|
||||
#endif
|
||||
vext.8 $In,$t1,$t1,#8
|
||||
veor $IN,$IN,$Xl @ I[i]^=Xi
|
||||
vpmull.p64 $Xln,$H,$In @ H·Ii+1
|
||||
vpmull.p64 $Xln,$H,$In @ H·Ii+1
|
||||
veor $t1,$t1,$In @ Karatsuba pre-processing
|
||||
vpmull2.p64 $Xhn,$H,$In
|
||||
b .Loop_mod2x_v8
|
||||
@@ -235,14 +252,14 @@ $code.=<<___;
|
||||
.Loop_mod2x_v8:
|
||||
vext.8 $t2,$IN,$IN,#8
|
||||
subs $len,$len,#32 @ is there more data?
|
||||
vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
|
||||
vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
|
||||
cclr $inc,lo @ is it time to zero $inc?
|
||||
|
||||
vpmull.p64 $Xmn,$Hhl,$t1
|
||||
veor $t2,$t2,$IN @ Karatsuba pre-processing
|
||||
vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
|
||||
vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
|
||||
veor $Xl,$Xl,$Xln @ accumulate
|
||||
vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2]
|
||||
|
||||
veor $Xh,$Xh,$Xhn
|
||||
@@ -267,7 +284,7 @@ $code.=<<___;
|
||||
vext.8 $In,$t1,$t1,#8
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
veor $Xl,$Xm,$t2
|
||||
vpmull.p64 $Xln,$H,$In @ H·Ii+1
|
||||
vpmull.p64 $Xln,$H,$In @ H·Ii+1
|
||||
veor $IN,$IN,$Xh @ accumulate $IN early
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
@@ -291,10 +308,10 @@ $code.=<<___;
|
||||
veor $IN,$IN,$Xl @ inp^=Xi
|
||||
veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi
|
||||
|
||||
vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
|
||||
vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
|
||||
veor $t1,$t1,$IN @ Karatsuba pre-processing
|
||||
vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
|
||||
vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
|
||||
vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
|
||||
27
crypto/modes/build.info
Normal file
27
crypto/modes/build.info
Normal file
@@ -0,0 +1,27 @@
|
||||
LIBS=../../libcrypto
|
||||
SOURCE[../../libcrypto]=\
|
||||
cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
|
||||
ccm128.c xts128.c wrap128.c ocb128.c \
|
||||
{- $target{modes_asm_src} -}
|
||||
|
||||
INCLUDE[gcm128.o]=..
|
||||
|
||||
GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(CFLAGS) $(LIB_CFLAGS)
|
||||
GENERATE[ghash-x86.s]=asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
|
||||
GENERATE[ghash-x86_64.s]=asm/ghash-x86_64.pl $(PERLASM_SCHEME)
|
||||
GENERATE[aesni-gcm-x86_64.s]=asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ghash-sparcv9.S]=asm/ghash-sparcv9.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[ghash-sparcv9.o]=..
|
||||
GENERATE[ghash-alpha.S]=asm/ghash-alpha.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ghash-parisc.s]=asm/ghash-parisc.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ghashp8-ppc.s]=asm/ghashp8-ppc.pl $(PERLASM_SCHEME)
|
||||
GENERATE[ghash-armv4.S]=asm/ghash-armv4.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[ghash-armv4.o]=..
|
||||
GENERATE[ghashv8-armx.S]=asm/ghashv8-armx.pl $(PERLASM_SCHEME)
|
||||
INCLUDE[ghashv8-armx.o]=..
|
||||
|
||||
BEGINRAW[Makefile]
|
||||
# GNU make "catch all"
|
||||
{- $builddir -}/ghash-%.S: {- $sourcedir -}/asm/ghash-%.pl
|
||||
CC="$(CC)" $(PERL) $< $(PERLASM_SCHEME) $@
|
||||
ENDRAW[Makefile]
|
||||
@@ -1,64 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
/*
|
||||
* Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC)
|
||||
# define STRICT_ALIGNMENT 0
|
||||
#endif
|
||||
@@ -70,8 +22,6 @@ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t n;
|
||||
const unsigned char *iv = ivec;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
#if !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
if (STRICT_ALIGNMENT &&
|
||||
((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) {
|
||||
@@ -123,8 +73,6 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned char c[16];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
#if !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
if (in != out) {
|
||||
const unsigned char *iv = ivec;
|
||||
|
||||
@@ -1,63 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2011 The OpenSSL Project. All rights reserved.
|
||||
/*
|
||||
* Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* First you setup M and L parameters and pass the key schedule. This is
|
||||
* called once per session setup...
|
||||
|
||||
@@ -1,64 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
/*
|
||||
* Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* The input and output encrypted as though 128bit cfb mode is being used.
|
||||
* The extra state information to record how much of the 128bit block we have
|
||||
@@ -72,8 +24,6 @@ void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned int n;
|
||||
size_t l = 0;
|
||||
|
||||
assert(in && out && key && ivec && num);
|
||||
|
||||
n = *num;
|
||||
|
||||
if (enc) {
|
||||
@@ -190,7 +140,7 @@ static void cfbr_encrypt_block(const unsigned char *in, unsigned char *out,
|
||||
block128_f block)
|
||||
{
|
||||
int n, rem, num;
|
||||
unsigned char ovec[16 * 2 + 1]; /* +1 because we dererefence (but don't
|
||||
unsigned char ovec[16 * 2 + 1]; /* +1 because we dereference (but don't
|
||||
* use) one byte off the end */
|
||||
|
||||
if (nbits <= 0 || nbits > 128)
|
||||
@@ -228,9 +178,6 @@ void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t n;
|
||||
unsigned char c[1], d[1];
|
||||
|
||||
assert(in && out && key && ivec && num);
|
||||
assert(*num == 0);
|
||||
|
||||
for (n = 0; n < bits; ++n) {
|
||||
c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0;
|
||||
cfbr_encrypt_block(c, d, 1, key, ivec, enc, block);
|
||||
@@ -246,9 +193,6 @@ void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
|
||||
{
|
||||
size_t n;
|
||||
|
||||
assert(in && out && key && ivec && num);
|
||||
assert(*num == 0);
|
||||
|
||||
for (n = 0; n < length; ++n)
|
||||
cfbr_encrypt_block(&in[n], &out[n], 8, key, ivec, enc, block);
|
||||
}
|
||||
|
||||
@@ -1,64 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
/*
|
||||
* Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* NOTE: the IV/counter CTR mode is big-endian. The code itself is
|
||||
* endian-neutral.
|
||||
@@ -67,23 +19,20 @@
|
||||
/* increment counter (128-bit int) by 1 */
|
||||
static void ctr128_inc(unsigned char *counter)
|
||||
{
|
||||
u32 n = 16;
|
||||
u8 c;
|
||||
u32 n = 16, c = 1;
|
||||
|
||||
do {
|
||||
--n;
|
||||
c = counter[n];
|
||||
++c;
|
||||
counter[n] = c;
|
||||
if (c)
|
||||
return;
|
||||
c += counter[n];
|
||||
counter[n] = (u8)c;
|
||||
c >>= 8;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
#if !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
static void ctr128_inc_aligned(unsigned char *counter)
|
||||
{
|
||||
size_t *data, c, n;
|
||||
size_t *data, c, d, n;
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
@@ -91,20 +40,19 @@ static void ctr128_inc_aligned(unsigned char *counter)
|
||||
1
|
||||
};
|
||||
|
||||
if (is_endian.little) {
|
||||
if (is_endian.little || ((size_t)counter % sizeof(size_t)) != 0) {
|
||||
ctr128_inc(counter);
|
||||
return;
|
||||
}
|
||||
|
||||
data = (size_t *)counter;
|
||||
c = 1;
|
||||
n = 16 / sizeof(size_t);
|
||||
do {
|
||||
--n;
|
||||
c = data[n];
|
||||
++c;
|
||||
data[n] = c;
|
||||
if (c)
|
||||
return;
|
||||
d = data[n] += c;
|
||||
/* did addition carry? */
|
||||
c = ((d - c) & ~d) >> (sizeof(size_t) * 8 - 1);
|
||||
} while (n);
|
||||
}
|
||||
#endif
|
||||
@@ -117,7 +65,7 @@ static void ctr128_inc_aligned(unsigned char *counter)
|
||||
* before the first call to CRYPTO_ctr128_encrypt(). This algorithm assumes
|
||||
* that the counter is in the x lower bits of the IV (ivec), and that the
|
||||
* application has full control over overflow and the rest of the IV. This
|
||||
* implementation takes NO responsability for checking that the counter
|
||||
* implementation takes NO responsibility for checking that the counter
|
||||
* doesn't overflow into the rest of the IV when incremented.
|
||||
*/
|
||||
void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
@@ -129,9 +77,6 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned int n;
|
||||
size_t l = 0;
|
||||
|
||||
assert(in && out && key && ecount_buf && num);
|
||||
assert(*num < 16);
|
||||
|
||||
n = *num;
|
||||
|
||||
#if !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
@@ -144,14 +89,14 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
}
|
||||
|
||||
# if defined(STRICT_ALIGNMENT)
|
||||
if (((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) !=
|
||||
0)
|
||||
if (((size_t)in | (size_t)out | (size_t)ecount_buf)
|
||||
% sizeof(size_t) != 0)
|
||||
break;
|
||||
# endif
|
||||
while (len >= 16) {
|
||||
(*block) (ivec, ecount_buf, key);
|
||||
ctr128_inc_aligned(ivec);
|
||||
for (; n < 16; n += sizeof(size_t))
|
||||
for (n = 0; n < 16; n += sizeof(size_t))
|
||||
*(size_t *)(out + n) =
|
||||
*(size_t *)(in + n) ^ *(size_t *)(ecount_buf + n);
|
||||
len -= 16;
|
||||
@@ -189,16 +134,13 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
/* increment upper 96 bits of 128-bit counter by 1 */
|
||||
static void ctr96_inc(unsigned char *counter)
|
||||
{
|
||||
u32 n = 12;
|
||||
u8 c;
|
||||
u32 n = 12, c = 1;
|
||||
|
||||
do {
|
||||
--n;
|
||||
c = counter[n];
|
||||
++c;
|
||||
counter[n] = c;
|
||||
if (c)
|
||||
return;
|
||||
c += counter[n];
|
||||
counter[n] = (u8)c;
|
||||
c >>= 8;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
@@ -210,9 +152,6 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
|
||||
{
|
||||
unsigned int n, ctr32;
|
||||
|
||||
assert(in && out && key && ecount_buf && num);
|
||||
assert(*num < 16);
|
||||
|
||||
n = *num;
|
||||
|
||||
while (n && len) {
|
||||
@@ -245,7 +184,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
|
||||
(*func) (in, out, blocks, key, ivec);
|
||||
/* (*ctr) does not update ivec, caller does: */
|
||||
PUTU32(ivec + 12, ctr32);
|
||||
/* ... overflow was detected, propogate carry. */
|
||||
/* ... overflow was detected, propagate carry. */
|
||||
if (ctr32 == 0)
|
||||
ctr96_inc(ivec);
|
||||
blocks *= 16;
|
||||
|
||||
@@ -1,21 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
/*
|
||||
* Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Rights for redistribution and usage in source and binary
|
||||
* forms are granted according to the OpenSSL license.
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* Trouble with Ciphertext Stealing, CTS, mode is that there is no
|
||||
* common official specification, but couple of cipher/application
|
||||
@@ -36,8 +31,6 @@ size_t CRYPTO_cts128_encrypt_block(const unsigned char *in,
|
||||
{
|
||||
size_t residue, n;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len <= 16)
|
||||
return 0;
|
||||
|
||||
@@ -68,8 +61,6 @@ size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in,
|
||||
{
|
||||
size_t residue, n;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len < 16)
|
||||
return 0;
|
||||
|
||||
@@ -103,8 +94,6 @@ size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned char c[16];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len <= 16)
|
||||
return 0;
|
||||
|
||||
@@ -141,8 +130,6 @@ size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned char c[16];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len < 16)
|
||||
return 0;
|
||||
|
||||
@@ -179,8 +166,6 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in,
|
||||
unsigned char c[32];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len <= 16)
|
||||
return 0;
|
||||
|
||||
@@ -224,8 +209,6 @@ size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in,
|
||||
unsigned char c[32];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len < 16)
|
||||
return 0;
|
||||
|
||||
@@ -272,8 +255,6 @@ size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned char c[32];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len <= 16)
|
||||
return 0;
|
||||
|
||||
@@ -314,8 +295,6 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned char c[32];
|
||||
} tmp;
|
||||
|
||||
assert(in && out && key && ivec);
|
||||
|
||||
if (len < 16)
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -1,65 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2010 The OpenSSL Project. All rights reserved.
|
||||
/*
|
||||
* Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#define OPENSSL_FIPSAPI
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
|
||||
/* redefine, because alignment is ensured */
|
||||
# undef GETU32
|
||||
@@ -150,9 +101,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
static const size_t rem_8bit[256] = {
|
||||
PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
|
||||
PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
|
||||
@@ -321,9 +270,7 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2])
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
|
||||
if (is_endian.little)
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@@ -356,9 +303,7 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
|
||||
nlo = ((const u8 *)Xi)[15];
|
||||
nhi = nlo >> 4;
|
||||
@@ -437,9 +382,7 @@ static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
|
||||
# if 1
|
||||
do {
|
||||
@@ -629,9 +572,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
|
||||
V.hi = H[0]; /* H is in host byte order, no byte swapping */
|
||||
V.lo = H[1];
|
||||
@@ -774,9 +715,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
ctx->block = block;
|
||||
@@ -801,6 +740,11 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
|
||||
#if TABLE_BITS==8
|
||||
gcm_init_8bit(ctx->Htable, ctx->H.u);
|
||||
#elif TABLE_BITS==4
|
||||
# if defined(GHASH)
|
||||
# define CTX__GHASH(f) (ctx->ghash = (f))
|
||||
# else
|
||||
# define CTX__GHASH(f) (ctx->ghash = NULL)
|
||||
# endif
|
||||
# if defined(GHASH_ASM_X86_OR_64)
|
||||
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
|
||||
if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
|
||||
@@ -808,11 +752,11 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
|
||||
if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
|
||||
gcm_init_avx(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_avx;
|
||||
ctx->ghash = gcm_ghash_avx;
|
||||
CTX__GHASH(gcm_ghash_avx);
|
||||
} else {
|
||||
gcm_init_clmul(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_clmul;
|
||||
ctx->ghash = gcm_ghash_clmul;
|
||||
CTX__GHASH(gcm_ghash_clmul);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -825,66 +769,59 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
|
||||
if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
|
||||
# endif
|
||||
ctx->gmult = gcm_gmult_4bit_mmx;
|
||||
ctx->ghash = gcm_ghash_4bit_mmx;
|
||||
CTX__GHASH(gcm_ghash_4bit_mmx);
|
||||
} else {
|
||||
ctx->gmult = gcm_gmult_4bit_x86;
|
||||
ctx->ghash = gcm_ghash_4bit_x86;
|
||||
CTX__GHASH(gcm_ghash_4bit_x86);
|
||||
}
|
||||
# else
|
||||
ctx->gmult = gcm_gmult_4bit;
|
||||
ctx->ghash = gcm_ghash_4bit;
|
||||
CTX__GHASH(gcm_ghash_4bit);
|
||||
# endif
|
||||
# elif defined(GHASH_ASM_ARM)
|
||||
# ifdef PMULL_CAPABLE
|
||||
if (PMULL_CAPABLE) {
|
||||
gcm_init_v8(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_v8;
|
||||
ctx->ghash = gcm_ghash_v8;
|
||||
CTX__GHASH(gcm_ghash_v8);
|
||||
} else
|
||||
# endif
|
||||
# ifdef NEON_CAPABLE
|
||||
if (NEON_CAPABLE) {
|
||||
gcm_init_neon(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_neon;
|
||||
ctx->ghash = gcm_ghash_neon;
|
||||
CTX__GHASH(gcm_ghash_neon);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
gcm_init_4bit(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_4bit;
|
||||
# if defined(GHASH)
|
||||
ctx->ghash = gcm_ghash_4bit;
|
||||
# else
|
||||
ctx->ghash = NULL;
|
||||
# endif
|
||||
CTX__GHASH(gcm_ghash_4bit);
|
||||
}
|
||||
# elif defined(GHASH_ASM_SPARC)
|
||||
if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
|
||||
gcm_init_vis3(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_vis3;
|
||||
ctx->ghash = gcm_ghash_vis3;
|
||||
CTX__GHASH(gcm_ghash_vis3);
|
||||
} else {
|
||||
gcm_init_4bit(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_4bit;
|
||||
ctx->ghash = gcm_ghash_4bit;
|
||||
CTX__GHASH(gcm_ghash_4bit);
|
||||
}
|
||||
# elif defined(GHASH_ASM_PPC)
|
||||
if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
|
||||
gcm_init_p8(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_p8;
|
||||
ctx->ghash = gcm_ghash_p8;
|
||||
CTX__GHASH(gcm_ghash_p8);
|
||||
} else {
|
||||
gcm_init_4bit(ctx->Htable, ctx->H.u);
|
||||
ctx->gmult = gcm_gmult_4bit;
|
||||
# if defined(GHASH)
|
||||
ctx->ghash = gcm_ghash_4bit;
|
||||
# else
|
||||
ctx->ghash = NULL;
|
||||
# endif
|
||||
CTX__GHASH(gcm_ghash_4bit);
|
||||
}
|
||||
# else
|
||||
gcm_init_4bit(ctx->Htable, ctx->H.u);
|
||||
# endif
|
||||
# undef CTX__GHASH
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -894,9 +831,7 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
unsigned int ctr;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
|
||||
@@ -1040,9 +975,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
unsigned int n, ctr;
|
||||
size_t i;
|
||||
u64 mlen = ctx->len.u[1];
|
||||
@@ -1050,15 +983,12 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
void *key = ctx->key;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
|
||||
# ifdef GHASH
|
||||
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
|
||||
const u8 *inp, size_t len) = ctx->ghash;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
|
||||
#endif
|
||||
mlen += len;
|
||||
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
|
||||
return -1;
|
||||
@@ -1100,7 +1030,8 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
|
||||
break;
|
||||
# endif
|
||||
# if defined(GHASH) && defined(GHASH_CHUNK)
|
||||
# if defined(GHASH)
|
||||
# if defined(GHASH_CHUNK)
|
||||
while (len >= GHASH_CHUNK) {
|
||||
size_t j = GHASH_CHUNK;
|
||||
|
||||
@@ -1111,11 +1042,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
(*block) (ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
if (is_endian.little)
|
||||
# ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
# else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
# endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
for (i = 0; i < 16 / sizeof(size_t); ++i)
|
||||
@@ -1127,6 +1058,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
|
||||
len -= GHASH_CHUNK;
|
||||
}
|
||||
# endif
|
||||
if ((i = (len & (size_t)-16))) {
|
||||
size_t j = i;
|
||||
|
||||
@@ -1227,9 +1159,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
unsigned int n, ctr;
|
||||
size_t i;
|
||||
u64 mlen = ctx->len.u[1];
|
||||
@@ -1237,7 +1167,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
void *key = ctx->key;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
|
||||
# ifdef GHASH
|
||||
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
|
||||
const u8 *inp, size_t len) = ctx->ghash;
|
||||
# endif
|
||||
@@ -1286,7 +1216,8 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
|
||||
break;
|
||||
# endif
|
||||
# if defined(GHASH) && defined(GHASH_CHUNK)
|
||||
# if defined(GHASH)
|
||||
# if defined(GHASH_CHUNK)
|
||||
while (len >= GHASH_CHUNK) {
|
||||
size_t j = GHASH_CHUNK;
|
||||
|
||||
@@ -1298,11 +1229,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
(*block) (ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
if (is_endian.little)
|
||||
# ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
# else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
# endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
for (i = 0; i < 16 / sizeof(size_t); ++i)
|
||||
@@ -1313,6 +1244,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
}
|
||||
len -= GHASH_CHUNK;
|
||||
}
|
||||
# endif
|
||||
if ((i = (len & (size_t)-16))) {
|
||||
GHASH(ctx, in, i);
|
||||
while (len >= 16) {
|
||||
@@ -1416,23 +1348,24 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len, ctr128_f stream)
|
||||
{
|
||||
#if defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
return CRYPTO_gcm128_encrypt(ctx, in, out, len);
|
||||
#else
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
unsigned int n, ctr;
|
||||
size_t i;
|
||||
u64 mlen = ctx->len.u[1];
|
||||
void *key = ctx->key;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
# ifdef GCM_FUNCREF_4BIT
|
||||
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
|
||||
# ifdef GHASH
|
||||
# ifdef GHASH
|
||||
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
|
||||
const u8 *inp, size_t len) = ctx->ghash;
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
mlen += len;
|
||||
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
|
||||
@@ -1446,11 +1379,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
}
|
||||
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctr = BSWAP4(ctx->Yi.d[3]);
|
||||
#else
|
||||
# else
|
||||
ctr = GETU32(ctx->Yi.c + 12);
|
||||
#endif
|
||||
# endif
|
||||
else
|
||||
ctr = ctx->Yi.d[3];
|
||||
|
||||
@@ -1468,60 +1401,60 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
# if defined(GHASH) && defined(GHASH_CHUNK)
|
||||
while (len >= GHASH_CHUNK) {
|
||||
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
|
||||
ctr += GHASH_CHUNK / 16;
|
||||
if (is_endian.little)
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
GHASH(ctx, out, GHASH_CHUNK);
|
||||
out += GHASH_CHUNK;
|
||||
in += GHASH_CHUNK;
|
||||
len -= GHASH_CHUNK;
|
||||
}
|
||||
# endif
|
||||
if ((i = (len & (size_t)-16))) {
|
||||
size_t j = i / 16;
|
||||
|
||||
(*stream) (in, out, j, key, ctx->Yi.c);
|
||||
ctr += (unsigned int)j;
|
||||
if (is_endian.little)
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
GHASH(ctx, out, GHASH_CHUNK);
|
||||
out += GHASH_CHUNK;
|
||||
in += GHASH_CHUNK;
|
||||
len -= GHASH_CHUNK;
|
||||
}
|
||||
#endif
|
||||
if ((i = (len & (size_t)-16))) {
|
||||
size_t j = i / 16;
|
||||
|
||||
(*stream) (in, out, j, key, ctx->Yi.c);
|
||||
ctr += (unsigned int)j;
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
#else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
#endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
in += i;
|
||||
len -= i;
|
||||
#if defined(GHASH)
|
||||
# if defined(GHASH)
|
||||
GHASH(ctx, out, i);
|
||||
out += i;
|
||||
#else
|
||||
# else
|
||||
while (j--) {
|
||||
for (i = 0; i < 16; ++i)
|
||||
ctx->Xi.c[i] ^= out[i];
|
||||
GCM_MUL(ctx, Xi);
|
||||
out += 16;
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
if (len) {
|
||||
(*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
#else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
#endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
while (len--) {
|
||||
@@ -1532,29 +1465,31 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
|
||||
ctx->mres = n;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len, ctr128_f stream)
|
||||
{
|
||||
#if defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
return CRYPTO_gcm128_decrypt(ctx, in, out, len);
|
||||
#else
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
unsigned int n, ctr;
|
||||
size_t i;
|
||||
u64 mlen = ctx->len.u[1];
|
||||
void *key = ctx->key;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
# ifdef GCM_FUNCREF_4BIT
|
||||
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
|
||||
# ifdef GHASH
|
||||
# ifdef GHASH
|
||||
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
|
||||
const u8 *inp, size_t len) = ctx->ghash;
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
mlen += len;
|
||||
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
|
||||
@@ -1568,11 +1503,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
}
|
||||
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctr = BSWAP4(ctx->Yi.d[3]);
|
||||
#else
|
||||
# else
|
||||
ctr = GETU32(ctx->Yi.c + 12);
|
||||
#endif
|
||||
# endif
|
||||
else
|
||||
ctr = ctx->Yi.d[3];
|
||||
|
||||
@@ -1592,30 +1527,30 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
# if defined(GHASH) && defined(GHASH_CHUNK)
|
||||
while (len >= GHASH_CHUNK) {
|
||||
GHASH(ctx, in, GHASH_CHUNK);
|
||||
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
|
||||
ctr += GHASH_CHUNK / 16;
|
||||
if (is_endian.little)
|
||||
# ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
# else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
# endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
out += GHASH_CHUNK;
|
||||
in += GHASH_CHUNK;
|
||||
len -= GHASH_CHUNK;
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
if ((i = (len & (size_t)-16))) {
|
||||
size_t j = i / 16;
|
||||
|
||||
#if defined(GHASH)
|
||||
# if defined(GHASH)
|
||||
GHASH(ctx, in, i);
|
||||
#else
|
||||
# else
|
||||
while (j--) {
|
||||
size_t k;
|
||||
for (k = 0; k < 16; ++k)
|
||||
@@ -1625,15 +1560,15 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
}
|
||||
j = i / 16;
|
||||
in -= i;
|
||||
#endif
|
||||
# endif
|
||||
(*stream) (in, out, j, key, ctx->Yi.c);
|
||||
ctr += (unsigned int)j;
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
#else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
#endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
out += i;
|
||||
@@ -1644,11 +1579,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
(*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
if (is_endian.little)
|
||||
#ifdef BSWAP4
|
||||
# ifdef BSWAP4
|
||||
ctx->Yi.d[3] = BSWAP4(ctr);
|
||||
#else
|
||||
# else
|
||||
PUTU32(ctx->Yi.c + 12, ctr);
|
||||
#endif
|
||||
# endif
|
||||
else
|
||||
ctx->Yi.d[3] = ctr;
|
||||
while (len--) {
|
||||
@@ -1661,6 +1596,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
|
||||
ctx->mres = n;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
|
||||
@@ -1669,9 +1605,7 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
|
||||
const union {
|
||||
long one;
|
||||
char little;
|
||||
} is_endian = {
|
||||
1
|
||||
};
|
||||
} is_endian = { 1 };
|
||||
u64 alen = ctx->len.u[0] << 3;
|
||||
u64 clen = ctx->len.u[1] << 3;
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
@@ -1720,7 +1654,7 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
|
||||
{
|
||||
GCM128_CONTEXT *ret;
|
||||
|
||||
if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
|
||||
if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
|
||||
CRYPTO_gcm128_init(ret, key, block);
|
||||
|
||||
return ret;
|
||||
@@ -1728,10 +1662,7 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
|
||||
|
||||
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
|
||||
{
|
||||
if (ctx) {
|
||||
OPENSSL_cleanse(ctx, sizeof(*ctx));
|
||||
OPENSSL_free(ctx);
|
||||
}
|
||||
OPENSSL_clear_free(ctx, sizeof(*ctx));
|
||||
}
|
||||
|
||||
#if defined(SELFTEST)
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Rights for redistribution and usage in source and binary
|
||||
* forms are granted according to the OpenSSL license.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef void (*block128_f) (const unsigned char in[16],
|
||||
unsigned char out[16], const void *key);
|
||||
|
||||
typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], int enc);
|
||||
|
||||
typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out,
|
||||
size_t blocks, const void *key,
|
||||
const unsigned char ivec[16]);
|
||||
|
||||
typedef void (*ccm128_f) (const unsigned char *in, unsigned char *out,
|
||||
size_t blocks, const void *key,
|
||||
const unsigned char ivec[16],
|
||||
unsigned char cmac[16]);
|
||||
|
||||
void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], block128_f block);
|
||||
void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], block128_f block);
|
||||
|
||||
void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16],
|
||||
unsigned char ecount_buf[16], unsigned int *num,
|
||||
block128_f block);
|
||||
|
||||
void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16],
|
||||
unsigned char ecount_buf[16],
|
||||
unsigned int *num, ctr128_f ctr);
|
||||
|
||||
void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], int *num,
|
||||
block128_f block);
|
||||
|
||||
void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], int *num,
|
||||
int enc, block128_f block);
|
||||
void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t length, const void *key,
|
||||
unsigned char ivec[16], int *num,
|
||||
int enc, block128_f block);
|
||||
void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t bits, const void *key,
|
||||
unsigned char ivec[16], int *num,
|
||||
int enc, block128_f block);
|
||||
|
||||
size_t CRYPTO_cts128_encrypt_block(const unsigned char *in,
|
||||
unsigned char *out, size_t len,
|
||||
const void *key, unsigned char ivec[16],
|
||||
block128_f block);
|
||||
size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], cbc128_f cbc);
|
||||
size_t CRYPTO_cts128_decrypt_block(const unsigned char *in,
|
||||
unsigned char *out, size_t len,
|
||||
const void *key, unsigned char ivec[16],
|
||||
block128_f block);
|
||||
size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], cbc128_f cbc);
|
||||
|
||||
size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in,
|
||||
unsigned char *out, size_t len,
|
||||
const void *key,
|
||||
unsigned char ivec[16],
|
||||
block128_f block);
|
||||
size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], cbc128_f cbc);
|
||||
size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in,
|
||||
unsigned char *out, size_t len,
|
||||
const void *key,
|
||||
unsigned char ivec[16],
|
||||
block128_f block);
|
||||
size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
|
||||
size_t len, const void *key,
|
||||
unsigned char ivec[16], cbc128_f cbc);
|
||||
|
||||
typedef struct gcm128_context GCM128_CONTEXT;
|
||||
|
||||
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
|
||||
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block);
|
||||
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
|
||||
size_t len);
|
||||
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
|
||||
size_t len);
|
||||
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len);
|
||||
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len);
|
||||
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len, ctr128_f stream);
|
||||
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len, ctr128_f stream);
|
||||
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
|
||||
size_t len);
|
||||
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
|
||||
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
|
||||
|
||||
typedef struct ccm128_context CCM128_CONTEXT;
|
||||
|
||||
void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
|
||||
unsigned int M, unsigned int L, void *key,
|
||||
block128_f block);
|
||||
int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, const unsigned char *nonce,
|
||||
size_t nlen, size_t mlen);
|
||||
void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, const unsigned char *aad,
|
||||
size_t alen);
|
||||
int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
|
||||
unsigned char *out, size_t len);
|
||||
int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
|
||||
unsigned char *out, size_t len);
|
||||
int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
|
||||
unsigned char *out, size_t len,
|
||||
ccm128_f stream);
|
||||
int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
|
||||
unsigned char *out, size_t len,
|
||||
ccm128_f stream);
|
||||
size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
|
||||
|
||||
typedef struct xts128_context XTS128_CONTEXT;
|
||||
|
||||
int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
|
||||
const unsigned char iv[16],
|
||||
const unsigned char *inp, unsigned char *out,
|
||||
size_t len, int enc);
|
||||
|
||||
size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block);
|
||||
|
||||
size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,8 +1,10 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2010 The OpenSSL Project. All rights reserved.
|
||||
/*
|
||||
* Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use is governed by OpenSSL license.
|
||||
* ====================================================================
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/modes.h>
|
||||
@@ -141,3 +143,43 @@ struct ccm128_context {
|
||||
block128_f block;
|
||||
void *key;
|
||||
};
|
||||
|
||||
#ifndef OPENSSL_NO_OCB
|
||||
|
||||
typedef union {
|
||||
u64 a[2];
|
||||
unsigned char c[16];
|
||||
} OCB_BLOCK;
|
||||
# define ocb_block16_xor(in1,in2,out) \
|
||||
( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
|
||||
(out)->a[1]=(in1)->a[1]^(in2)->a[1] )
|
||||
# if STRICT_ALIGNMENT
|
||||
# define ocb_block16_xor_misaligned(in1,in2,out) \
|
||||
ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
|
||||
# else
|
||||
# define ocb_block16_xor_misaligned ocb_block16_xor
|
||||
# endif
|
||||
|
||||
struct ocb128_context {
|
||||
/* Need both encrypt and decrypt key schedules for decryption */
|
||||
block128_f encrypt;
|
||||
block128_f decrypt;
|
||||
void *keyenc;
|
||||
void *keydec;
|
||||
ocb128_f stream; /* direction dependent */
|
||||
/* Key dependent variables. Can be reused if key remains the same */
|
||||
size_t l_index;
|
||||
size_t max_l_index;
|
||||
OCB_BLOCK l_star;
|
||||
OCB_BLOCK l_dollar;
|
||||
OCB_BLOCK *l;
|
||||
/* Must be reset for each session */
|
||||
u64 blocks_hashed;
|
||||
u64 blocks_processed;
|
||||
OCB_BLOCK tag;
|
||||
OCB_BLOCK offset_aad;
|
||||
OCB_BLOCK sum;
|
||||
OCB_BLOCK offset;
|
||||
OCB_BLOCK checksum;
|
||||
};
|
||||
#endif /* OPENSSL_NO_OCB */
|
||||
|
||||
568
crypto/modes/ocb128.c
Normal file
568
crypto/modes/ocb128.c
Normal file
@@ -0,0 +1,568 @@
|
||||
/*
|
||||
* Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
|
||||
#ifndef OPENSSL_NO_OCB
|
||||
|
||||
/*
|
||||
* Calculate the number of binary trailing zero's in any given number
|
||||
*/
|
||||
static u32 ocb_ntz(u64 n)
|
||||
{
|
||||
u32 cnt = 0;
|
||||
|
||||
/*
|
||||
* We do a right-to-left simple sequential search. This is surprisingly
|
||||
* efficient as the distribution of trailing zeros is not uniform,
|
||||
* e.g. the number of possible inputs with no trailing zeros is equal to
|
||||
* the number with 1 or more; the number with exactly 1 is equal to the
|
||||
* number with 2 or more, etc. Checking the last two bits covers 75% of
|
||||
* all numbers. Checking the last three covers 87.5%
|
||||
*/
|
||||
while (!(n & 1)) {
|
||||
n >>= 1;
|
||||
cnt++;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shift a block of 16 bytes left by shift bits
|
||||
*/
|
||||
static void ocb_block_lshift(const unsigned char *in, size_t shift,
|
||||
unsigned char *out)
|
||||
{
|
||||
unsigned char shift_mask;
|
||||
int i;
|
||||
unsigned char mask[15];
|
||||
|
||||
shift_mask = 0xff;
|
||||
shift_mask <<= (8 - shift);
|
||||
for (i = 15; i >= 0; i--) {
|
||||
if (i > 0) {
|
||||
mask[i - 1] = in[i] & shift_mask;
|
||||
mask[i - 1] >>= 8 - shift;
|
||||
}
|
||||
out[i] = in[i] << shift;
|
||||
|
||||
if (i != 15) {
|
||||
out[i] ^= mask[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a "double" operation as per OCB spec
|
||||
*/
|
||||
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
|
||||
{
|
||||
unsigned char mask;
|
||||
|
||||
/*
|
||||
* Calculate the mask based on the most significant bit. There are more
|
||||
* efficient ways to do this - but this way is constant time
|
||||
*/
|
||||
mask = in->c[0] & 0x80;
|
||||
mask >>= 7;
|
||||
mask *= 135;
|
||||
|
||||
ocb_block_lshift(in->c, 1, out->c);
|
||||
|
||||
out->c[15] ^= mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform an xor on in1 and in2 - each of len bytes. Store result in out
|
||||
*/
|
||||
static void ocb_block_xor(const unsigned char *in1,
|
||||
const unsigned char *in2, size_t len,
|
||||
unsigned char *out)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < len; i++) {
|
||||
out[i] = in1[i] ^ in2[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup L_index in our lookup table. If we haven't already got it we need to
|
||||
* calculate it
|
||||
*/
|
||||
static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
|
||||
{
|
||||
size_t l_index = ctx->l_index;
|
||||
|
||||
if (idx <= l_index) {
|
||||
return ctx->l + idx;
|
||||
}
|
||||
|
||||
/* We don't have it - so calculate it */
|
||||
if (idx >= ctx->max_l_index) {
|
||||
void *tmp_ptr;
|
||||
/*
|
||||
* Each additional entry allows to process almost double as
|
||||
* much data, so that in linear world the table will need to
|
||||
* be expanded with smaller and smaller increments. Originally
|
||||
* it was doubling in size, which was a waste. Growing it
|
||||
* linearly is not formally optimal, but is simpler to implement.
|
||||
* We grow table by minimally required 4*n that would accommodate
|
||||
* the index.
|
||||
*/
|
||||
ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3;
|
||||
tmp_ptr =
|
||||
OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK));
|
||||
if (tmp_ptr == NULL) /* prevent ctx->l from being clobbered */
|
||||
return NULL;
|
||||
ctx->l = tmp_ptr;
|
||||
}
|
||||
while (l_index < idx) {
|
||||
ocb_double(ctx->l + l_index, ctx->l + l_index + 1);
|
||||
l_index++;
|
||||
}
|
||||
ctx->l_index = l_index;
|
||||
|
||||
return ctx->l + idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new OCB128_CONTEXT
|
||||
*/
|
||||
OCB128_CONTEXT *CRYPTO_ocb128_new(void *keyenc, void *keydec,
|
||||
block128_f encrypt, block128_f decrypt,
|
||||
ocb128_f stream)
|
||||
{
|
||||
OCB128_CONTEXT *octx;
|
||||
int ret;
|
||||
|
||||
if ((octx = OPENSSL_malloc(sizeof(*octx))) != NULL) {
|
||||
ret = CRYPTO_ocb128_init(octx, keyenc, keydec, encrypt, decrypt,
|
||||
stream);
|
||||
if (ret)
|
||||
return octx;
|
||||
OPENSSL_free(octx);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialise an existing OCB128_CONTEXT
|
||||
*/
|
||||
int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
|
||||
block128_f encrypt, block128_f decrypt,
|
||||
ocb128_f stream)
|
||||
{
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
ctx->l_index = 0;
|
||||
ctx->max_l_index = 5;
|
||||
ctx->l = OPENSSL_malloc(ctx->max_l_index * 16);
|
||||
if (ctx->l == NULL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We set both the encryption and decryption key schedules - decryption
|
||||
* needs both. Don't really need decryption schedule if only doing
|
||||
* encryption - but it simplifies things to take it anyway
|
||||
*/
|
||||
ctx->encrypt = encrypt;
|
||||
ctx->decrypt = decrypt;
|
||||
ctx->stream = stream;
|
||||
ctx->keyenc = keyenc;
|
||||
ctx->keydec = keydec;
|
||||
|
||||
/* L_* = ENCIPHER(K, zeros(128)) */
|
||||
ctx->encrypt(ctx->l_star.c, ctx->l_star.c, ctx->keyenc);
|
||||
|
||||
/* L_$ = double(L_*) */
|
||||
ocb_double(&ctx->l_star, &ctx->l_dollar);
|
||||
|
||||
/* L_0 = double(L_$) */
|
||||
ocb_double(&ctx->l_dollar, ctx->l);
|
||||
|
||||
/* L_{i} = double(L_{i-1}) */
|
||||
ocb_double(ctx->l, ctx->l+1);
|
||||
ocb_double(ctx->l+1, ctx->l+2);
|
||||
ocb_double(ctx->l+2, ctx->l+3);
|
||||
ocb_double(ctx->l+3, ctx->l+4);
|
||||
ctx->l_index = 4; /* enough to process up to 496 bytes */
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an OCB128_CONTEXT object
|
||||
*/
|
||||
int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src,
|
||||
void *keyenc, void *keydec)
|
||||
{
|
||||
memcpy(dest, src, sizeof(OCB128_CONTEXT));
|
||||
if (keyenc)
|
||||
dest->keyenc = keyenc;
|
||||
if (keydec)
|
||||
dest->keydec = keydec;
|
||||
if (src->l) {
|
||||
dest->l = OPENSSL_malloc(src->max_l_index * 16);
|
||||
if (dest->l == NULL)
|
||||
return 0;
|
||||
memcpy(dest->l, src->l, (src->l_index + 1) * 16);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the IV to be used for this operation. Must be 1 - 15 bytes.
|
||||
*/
|
||||
int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
|
||||
size_t len, size_t taglen)
|
||||
{
|
||||
unsigned char ktop[16], tmp[16], mask;
|
||||
unsigned char stretch[24], nonce[16];
|
||||
size_t bottom, shift;
|
||||
|
||||
/*
|
||||
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
|
||||
* We don't support this at this stage
|
||||
*/
|
||||
if ((len > 15) || (len < 1) || (taglen > 16) || (taglen < 1)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Nonce = num2str(TAGLEN mod 128,7) || zeros(120-bitlen(N)) || 1 || N */
|
||||
nonce[0] = ((taglen * 8) % 128) << 1;
|
||||
memset(nonce + 1, 0, 15);
|
||||
memcpy(nonce + 16 - len, iv, len);
|
||||
nonce[15 - len] |= 1;
|
||||
|
||||
/* Ktop = ENCIPHER(K, Nonce[1..122] || zeros(6)) */
|
||||
memcpy(tmp, nonce, 16);
|
||||
tmp[15] &= 0xc0;
|
||||
ctx->encrypt(tmp, ktop, ctx->keyenc);
|
||||
|
||||
/* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
|
||||
memcpy(stretch, ktop, 16);
|
||||
ocb_block_xor(ktop, ktop + 1, 8, stretch + 16);
|
||||
|
||||
/* bottom = str2num(Nonce[123..128]) */
|
||||
bottom = nonce[15] & 0x3f;
|
||||
|
||||
/* Offset_0 = Stretch[1+bottom..128+bottom] */
|
||||
shift = bottom % 8;
|
||||
ocb_block_lshift(stretch + (bottom / 8), shift, ctx->offset.c);
|
||||
mask = 0xff;
|
||||
mask <<= 8 - shift;
|
||||
ctx->offset.c[15] |=
|
||||
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Provide any AAD. This can be called multiple times. Only the final time can
|
||||
* have a partial block
|
||||
*/
|
||||
int CRYPTO_ocb128_aad(OCB128_CONTEXT *ctx, const unsigned char *aad,
|
||||
size_t len)
|
||||
{
|
||||
u64 i, all_num_blocks;
|
||||
size_t num_blocks, last_len;
|
||||
OCB_BLOCK tmp1;
|
||||
OCB_BLOCK tmp2;
|
||||
|
||||
/* Calculate the number of blocks of AAD provided now, and so far */
|
||||
num_blocks = len / 16;
|
||||
all_num_blocks = num_blocks + ctx->blocks_hashed;
|
||||
|
||||
/* Loop through all full blocks of AAD */
|
||||
for (i = ctx->blocks_hashed + 1; i <= all_num_blocks; i++) {
|
||||
OCB_BLOCK *lookup;
|
||||
OCB_BLOCK *aad_block;
|
||||
|
||||
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
||||
lookup = ocb_lookup_l(ctx, ocb_ntz(i));
|
||||
if (lookup == NULL)
|
||||
return 0;
|
||||
ocb_block16_xor(&ctx->offset_aad, lookup, &ctx->offset_aad);
|
||||
|
||||
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
|
||||
aad_block = (OCB_BLOCK *)(aad + ((i - ctx->blocks_hashed - 1) * 16));
|
||||
ocb_block16_xor(&ctx->offset_aad, aad_block, &tmp1);
|
||||
ctx->encrypt(tmp1.c, tmp2.c, ctx->keyenc);
|
||||
ocb_block16_xor(&ctx->sum, &tmp2, &ctx->sum);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we have any partial blocks left over. This is only valid in the
|
||||
* last call to this function
|
||||
*/
|
||||
last_len = len % 16;
|
||||
|
||||
if (last_len > 0) {
|
||||
/* Offset_* = Offset_m xor L_* */
|
||||
ocb_block16_xor(&ctx->offset_aad, &ctx->l_star, &ctx->offset_aad);
|
||||
|
||||
/* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */
|
||||
memset(&tmp1, 0, 16);
|
||||
memcpy(&tmp1, aad + (num_blocks * 16), last_len);
|
||||
((unsigned char *)&tmp1)[last_len] = 0x80;
|
||||
ocb_block16_xor(&ctx->offset_aad, &tmp1, &tmp2);
|
||||
|
||||
/* Sum = Sum_m xor ENCIPHER(K, CipherInput) */
|
||||
ctx->encrypt(tmp2.c, tmp1.c, ctx->keyenc);
|
||||
ocb_block16_xor(&ctx->sum, &tmp1, &ctx->sum);
|
||||
}
|
||||
|
||||
ctx->blocks_hashed = all_num_blocks;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Provide any data to be encrypted. This can be called multiple times. Only
|
||||
* the final time can have a partial block
|
||||
*/
|
||||
int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len)
|
||||
{
|
||||
u64 i, all_num_blocks;
|
||||
size_t num_blocks, last_len;
|
||||
OCB_BLOCK tmp1;
|
||||
OCB_BLOCK tmp2;
|
||||
OCB_BLOCK pad;
|
||||
|
||||
/*
|
||||
* Calculate the number of blocks of data to be encrypted provided now, and
|
||||
* so far
|
||||
*/
|
||||
num_blocks = len / 16;
|
||||
all_num_blocks = num_blocks + ctx->blocks_processed;
|
||||
|
||||
if (num_blocks && all_num_blocks == (size_t)all_num_blocks
|
||||
&& ctx->stream != NULL) {
|
||||
size_t max_idx = 0, top = (size_t)all_num_blocks;
|
||||
|
||||
/*
|
||||
* See how many L_{i} entries we need to process data at hand
|
||||
* and pre-compute missing entries in the table [if any]...
|
||||
*/
|
||||
while (top >>= 1)
|
||||
max_idx++;
|
||||
if (ocb_lookup_l(ctx, max_idx) == NULL)
|
||||
return 0;
|
||||
|
||||
ctx->stream(in, out, num_blocks, ctx->keyenc,
|
||||
(size_t)ctx->blocks_processed + 1, ctx->offset.c,
|
||||
(const unsigned char (*)[16])ctx->l, ctx->checksum.c);
|
||||
} else {
|
||||
/* Loop through all full blocks to be encrypted */
|
||||
for (i = ctx->blocks_processed + 1; i <= all_num_blocks; i++) {
|
||||
OCB_BLOCK *lookup;
|
||||
OCB_BLOCK *inblock;
|
||||
OCB_BLOCK *outblock;
|
||||
|
||||
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
||||
lookup = ocb_lookup_l(ctx, ocb_ntz(i));
|
||||
if (lookup == NULL)
|
||||
return 0;
|
||||
ocb_block16_xor(&ctx->offset, lookup, &ctx->offset);
|
||||
|
||||
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
||||
inblock =
|
||||
(OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||
ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
|
||||
ctx->encrypt(tmp1.c, tmp2.c, ctx->keyenc);
|
||||
outblock =
|
||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we have any partial blocks left over. This is only valid in the
|
||||
* last call to this function
|
||||
*/
|
||||
last_len = len % 16;
|
||||
|
||||
if (last_len > 0) {
|
||||
/* Offset_* = Offset_m xor L_* */
|
||||
ocb_block16_xor(&ctx->offset, &ctx->l_star, &ctx->offset);
|
||||
|
||||
/* Pad = ENCIPHER(K, Offset_*) */
|
||||
ctx->encrypt(ctx->offset.c, pad.c, ctx->keyenc);
|
||||
|
||||
/* C_* = P_* xor Pad[1..bitlen(P_*)] */
|
||||
ocb_block_xor(in + (len / 16) * 16, (unsigned char *)&pad, last_len,
|
||||
out + (num_blocks * 16));
|
||||
|
||||
/* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
|
||||
memset(&tmp1, 0, 16);
|
||||
memcpy(&tmp1, in + (len / 16) * 16, last_len);
|
||||
((unsigned char *)(&tmp1))[last_len] = 0x80;
|
||||
ocb_block16_xor(&ctx->checksum, &tmp1, &ctx->checksum);
|
||||
}
|
||||
|
||||
ctx->blocks_processed = all_num_blocks;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Provide any data to be decrypted. This can be called multiple times. Only
|
||||
* the final time can have a partial block
|
||||
*/
|
||||
int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
|
||||
const unsigned char *in, unsigned char *out,
|
||||
size_t len)
|
||||
{
|
||||
u64 i, all_num_blocks;
|
||||
size_t num_blocks, last_len;
|
||||
OCB_BLOCK tmp1;
|
||||
OCB_BLOCK tmp2;
|
||||
OCB_BLOCK pad;
|
||||
|
||||
/*
|
||||
* Calculate the number of blocks of data to be decrypted provided now, and
|
||||
* so far
|
||||
*/
|
||||
num_blocks = len / 16;
|
||||
all_num_blocks = num_blocks + ctx->blocks_processed;
|
||||
|
||||
if (num_blocks && all_num_blocks == (size_t)all_num_blocks
|
||||
&& ctx->stream != NULL) {
|
||||
size_t max_idx = 0, top = (size_t)all_num_blocks;
|
||||
|
||||
/*
|
||||
* See how many L_{i} entries we need to process data at hand
|
||||
* and pre-compute missing entries in the table [if any]...
|
||||
*/
|
||||
while (top >>= 1)
|
||||
max_idx++;
|
||||
if (ocb_lookup_l(ctx, max_idx) == NULL)
|
||||
return 0;
|
||||
|
||||
ctx->stream(in, out, num_blocks, ctx->keydec,
|
||||
(size_t)ctx->blocks_processed + 1, ctx->offset.c,
|
||||
(const unsigned char (*)[16])ctx->l, ctx->checksum.c);
|
||||
} else {
|
||||
/* Loop through all full blocks to be decrypted */
|
||||
for (i = ctx->blocks_processed + 1; i <= all_num_blocks; i++) {
|
||||
OCB_BLOCK *inblock;
|
||||
OCB_BLOCK *outblock;
|
||||
|
||||
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
||||
OCB_BLOCK *lookup = ocb_lookup_l(ctx, ocb_ntz(i));
|
||||
if (lookup == NULL)
|
||||
return 0;
|
||||
ocb_block16_xor(&ctx->offset, lookup, &ctx->offset);
|
||||
|
||||
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
||||
inblock =
|
||||
(OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
|
||||
ctx->decrypt(tmp1.c, tmp2.c, ctx->keydec);
|
||||
outblock =
|
||||
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
|
||||
ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
|
||||
|
||||
/* Checksum_i = Checksum_{i-1} xor P_i */
|
||||
ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we have any partial blocks left over. This is only valid in the
|
||||
* last call to this function
|
||||
*/
|
||||
last_len = len % 16;
|
||||
|
||||
if (last_len > 0) {
|
||||
/* Offset_* = Offset_m xor L_* */
|
||||
ocb_block16_xor(&ctx->offset, &ctx->l_star, &ctx->offset);
|
||||
|
||||
/* Pad = ENCIPHER(K, Offset_*) */
|
||||
ctx->encrypt(ctx->offset.c, pad.c, ctx->keyenc);
|
||||
|
||||
/* P_* = C_* xor Pad[1..bitlen(C_*)] */
|
||||
ocb_block_xor(in + (len / 16) * 16, (unsigned char *)&pad, last_len,
|
||||
out + (num_blocks * 16));
|
||||
|
||||
/* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
|
||||
memset(&tmp1, 0, 16);
|
||||
memcpy(&tmp1, out + (len / 16) * 16, last_len);
|
||||
((unsigned char *)(&tmp1))[last_len] = 0x80;
|
||||
ocb_block16_xor(&ctx->checksum, &tmp1, &ctx->checksum);
|
||||
}
|
||||
|
||||
ctx->blocks_processed = all_num_blocks;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the tag and verify it against the supplied tag
|
||||
*/
|
||||
int CRYPTO_ocb128_finish(OCB128_CONTEXT *ctx, const unsigned char *tag,
|
||||
size_t len)
|
||||
{
|
||||
OCB_BLOCK tmp1, tmp2;
|
||||
|
||||
/*
|
||||
* Tag = ENCIPHER(K, Checksum_* xor Offset_* xor L_$) xor HASH(K,A)
|
||||
*/
|
||||
ocb_block16_xor(&ctx->checksum, &ctx->offset, &tmp1);
|
||||
ocb_block16_xor(&tmp1, &ctx->l_dollar, &tmp2);
|
||||
ctx->encrypt(tmp2.c, tmp1.c, ctx->keyenc);
|
||||
ocb_block16_xor(&tmp1, &ctx->sum, &ctx->tag);
|
||||
|
||||
if (len > 16 || len < 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compare the tag if we've been given one */
|
||||
if (tag)
|
||||
return CRYPTO_memcmp(&ctx->tag, tag, len);
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the calculated tag
|
||||
*/
|
||||
int CRYPTO_ocb128_tag(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len)
|
||||
{
|
||||
if (len > 16 || len < 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Calculate the tag */
|
||||
CRYPTO_ocb128_finish(ctx, NULL, 0);
|
||||
|
||||
/* Copy the tag into the supplied buffer */
|
||||
memcpy(tag, &ctx->tag, len);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release all resources
|
||||
*/
|
||||
void CRYPTO_ocb128_cleanup(OCB128_CONTEXT *ctx)
|
||||
{
|
||||
if (ctx) {
|
||||
OPENSSL_clear_free(ctx->l, ctx->max_l_index * 16);
|
||||
OPENSSL_cleanse(ctx, sizeof(*ctx));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* OPENSSL_NO_OCB */
|
||||
@@ -1,64 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
/*
|
||||
* Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
* The input and output encrypted as though 128bit ofb mode is being used.
|
||||
* The extra state information to record how much of the 128bit block we have
|
||||
@@ -71,8 +23,6 @@ void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
|
||||
unsigned int n;
|
||||
size_t l = 0;
|
||||
|
||||
assert(in && out && key && ivec && num);
|
||||
|
||||
n = *num;
|
||||
|
||||
#if !defined(OPENSSL_SMALL_FOOTPRINT)
|
||||
|
||||
@@ -1,70 +1,50 @@
|
||||
/* crypto/modes/wrap128.c */
|
||||
/*
|
||||
* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
|
||||
* project.
|
||||
*/
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2013 The OpenSSL Project. All rights reserved.
|
||||
* Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* licensing@OpenSSL.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include "cryptlib.h"
|
||||
/** Beware!
|
||||
*
|
||||
* Following wrapping modes were designed for AES but this implementation
|
||||
* allows you to use them for any 128 bit block cipher.
|
||||
*/
|
||||
|
||||
#include "internal/cryptlib.h"
|
||||
#include <openssl/modes.h>
|
||||
|
||||
/** RFC 3394 section 2.2.3.1 Default Initial Value */
|
||||
static const unsigned char default_iv[] = {
|
||||
0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
|
||||
};
|
||||
|
||||
/*
|
||||
* Input size limit: lower than maximum of standards but far larger than
|
||||
* anything that will be used in practice.
|
||||
/** RFC 5649 section 3 Alternative Initial Value 32-bit constant */
|
||||
static const unsigned char default_aiv[] = {
|
||||
0xA6, 0x59, 0x59, 0xA6
|
||||
};
|
||||
|
||||
/** Input size limit: lower than maximum of standards but far larger than
|
||||
* anything that will be used in practice.
|
||||
*/
|
||||
#define CRYPTO128_WRAP_MAX (1UL << 31)
|
||||
|
||||
/** Wrapping according to RFC 3394 section 2.2.1.
|
||||
*
|
||||
* @param[in] key Key value.
|
||||
* @param[in] iv IV value. Length = 8 bytes. NULL = use default_iv.
|
||||
* @param[in] in Plaintext as n 64-bit blocks, n >= 2.
|
||||
* @param[in] inlen Length of in.
|
||||
* @param[out] out Ciphertext. Minimal buffer length = (inlen + 8) bytes.
|
||||
* Input and output buffers can overlap if block function
|
||||
* supports that.
|
||||
* @param[in] block Block processing function.
|
||||
* @return 0 if inlen does not consist of n 64-bit blocks, n >= 2.
|
||||
* or if inlen > CRYPTO128_WRAP_MAX.
|
||||
* Output length if wrapping succeeded.
|
||||
*/
|
||||
size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
@@ -72,11 +52,11 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
|
||||
{
|
||||
unsigned char *A, B[16], *R;
|
||||
size_t i, j, t;
|
||||
if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX))
|
||||
if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX))
|
||||
return 0;
|
||||
A = B;
|
||||
t = 1;
|
||||
memcpy(out + 8, in, inlen);
|
||||
memmove(out + 8, in, inlen);
|
||||
if (!iv)
|
||||
iv = default_iv;
|
||||
|
||||
@@ -100,10 +80,26 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
|
||||
return inlen + 8;
|
||||
}
|
||||
|
||||
size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block)
|
||||
/** Unwrapping according to RFC 3394 section 2.2.2 steps 1-2.
|
||||
* The IV check (step 3) is responsibility of the caller.
|
||||
*
|
||||
* @param[in] key Key value.
|
||||
* @param[out] iv Unchecked IV value. Minimal buffer length = 8 bytes.
|
||||
* @param[out] out Plaintext without IV.
|
||||
* Minimal buffer length = (inlen - 8) bytes.
|
||||
* Input and output buffers can overlap if block function
|
||||
* supports that.
|
||||
* @param[in] in Ciphertext as n 64-bit blocks.
|
||||
* @param[in] inlen Length of in.
|
||||
* @param[in] block Block processing function.
|
||||
* @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
|
||||
* or if inlen is not a multiple of 8.
|
||||
* Output length otherwise.
|
||||
*/
|
||||
static size_t crypto_128_unwrap_raw(void *key, unsigned char *iv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block)
|
||||
{
|
||||
unsigned char *A, B[16], *R;
|
||||
size_t i, j, t;
|
||||
@@ -113,7 +109,7 @@ size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
|
||||
A = B;
|
||||
t = 6 * (inlen >> 3);
|
||||
memcpy(A, in, 8);
|
||||
memcpy(out, in + 8, inlen);
|
||||
memmove(out, in + 8, inlen);
|
||||
for (j = 0; j < 6; j++) {
|
||||
R = out + inlen - 8;
|
||||
for (i = 0; i < inlen; i += 8, t--, R -= 8) {
|
||||
@@ -128,11 +124,206 @@ size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
|
||||
memcpy(R, B + 8, 8);
|
||||
}
|
||||
}
|
||||
memcpy(iv, A, 8);
|
||||
return inlen;
|
||||
}
|
||||
|
||||
/** Unwrapping according to RFC 3394 section 2.2.2, including the IV check.
|
||||
* The first block of plaintext has to match the supplied IV, otherwise an
|
||||
* error is returned.
|
||||
*
|
||||
* @param[in] key Key value.
|
||||
* @param[out] iv IV value to match against. Length = 8 bytes.
|
||||
* NULL = use default_iv.
|
||||
* @param[out] out Plaintext without IV.
|
||||
* Minimal buffer length = (inlen - 8) bytes.
|
||||
* Input and output buffers can overlap if block function
|
||||
* supports that.
|
||||
* @param[in] in Ciphertext as n 64-bit blocks.
|
||||
* @param[in] inlen Length of in.
|
||||
* @param[in] block Block processing function.
|
||||
* @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
|
||||
* or if inlen is not a multiple of 8
|
||||
* or if IV doesn't match expected value.
|
||||
* Output length otherwise.
|
||||
*/
|
||||
size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
|
||||
unsigned char *out, const unsigned char *in,
|
||||
size_t inlen, block128_f block)
|
||||
{
|
||||
size_t ret;
|
||||
unsigned char got_iv[8];
|
||||
|
||||
ret = crypto_128_unwrap_raw(key, got_iv, out, in, inlen, block);
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
|
||||
if (!iv)
|
||||
iv = default_iv;
|
||||
if (memcmp(A, iv, 8)) {
|
||||
if (CRYPTO_memcmp(got_iv, iv, 8)) {
|
||||
OPENSSL_cleanse(out, ret);
|
||||
return 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Wrapping according to RFC 5649 section 4.1.
|
||||
*
|
||||
* @param[in] key Key value.
|
||||
* @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv.
|
||||
* @param[out] out Ciphertext. Minimal buffer length = (inlen + 15) bytes.
|
||||
* Input and output buffers can overlap if block function
|
||||
* supports that.
|
||||
* @param[in] in Plaintext as n 64-bit blocks, n >= 2.
|
||||
* @param[in] inlen Length of in.
|
||||
* @param[in] block Block processing function.
|
||||
* @return 0 if inlen is out of range [1, CRYPTO128_WRAP_MAX].
|
||||
* Output length if wrapping succeeded.
|
||||
*/
|
||||
size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block)
|
||||
{
|
||||
/* n: number of 64-bit blocks in the padded key data
|
||||
*
|
||||
* If length of plain text is not a multiple of 8, pad the plain text octet
|
||||
* string on the right with octets of zeros, where final length is the
|
||||
* smallest multiple of 8 that is greater than length of plain text.
|
||||
* If length of plain text is a multiple of 8, then there is no padding. */
|
||||
const size_t blocks_padded = (inlen + 7) / 8; /* CEILING(m/8) */
|
||||
const size_t padded_len = blocks_padded * 8;
|
||||
const size_t padding_len = padded_len - inlen;
|
||||
/* RFC 5649 section 3: Alternative Initial Value */
|
||||
unsigned char aiv[8];
|
||||
int ret;
|
||||
|
||||
/* Section 1: use 32-bit fixed field for plaintext octet length */
|
||||
if (inlen == 0 || inlen >= CRYPTO128_WRAP_MAX)
|
||||
return 0;
|
||||
|
||||
/* Section 3: Alternative Initial Value */
|
||||
if (!icv)
|
||||
memcpy(aiv, default_aiv, 4);
|
||||
else
|
||||
memcpy(aiv, icv, 4); /* Standard doesn't mention this. */
|
||||
|
||||
aiv[4] = (inlen >> 24) & 0xFF;
|
||||
aiv[5] = (inlen >> 16) & 0xFF;
|
||||
aiv[6] = (inlen >> 8) & 0xFF;
|
||||
aiv[7] = inlen & 0xFF;
|
||||
|
||||
if (padded_len == 8) {
|
||||
/*
|
||||
* Section 4.1 - special case in step 2: If the padded plaintext
|
||||
* contains exactly eight octets, then prepend the AIV and encrypt
|
||||
* the resulting 128-bit block using AES in ECB mode.
|
||||
*/
|
||||
memmove(out + 8, in, inlen);
|
||||
memcpy(out, aiv, 8);
|
||||
memset(out + 8 + inlen, 0, padding_len);
|
||||
block(out, out, key);
|
||||
ret = 16; /* AIV + padded input */
|
||||
} else {
|
||||
memmove(out, in, inlen);
|
||||
memset(out + inlen, 0, padding_len); /* Section 4.1 step 1 */
|
||||
ret = CRYPTO_128_wrap(key, aiv, out, out, padded_len, block);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Unwrapping according to RFC 5649 section 4.2.
|
||||
*
|
||||
* @param[in] key Key value.
|
||||
* @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv.
|
||||
* @param[out] out Plaintext. Minimal buffer length = inlen bytes.
|
||||
* Input and output buffers can overlap if block function
|
||||
* supports that.
|
||||
* @param[in] in Ciphertext as n 64-bit blocks.
|
||||
* @param[in] inlen Length of in.
|
||||
* @param[in] block Block processing function.
|
||||
* @return 0 if inlen is out of range [16, CRYPTO128_WRAP_MAX],
|
||||
* or if inlen is not a multiple of 8
|
||||
* or if IV and message length indicator doesn't match.
|
||||
* Output length if unwrapping succeeded and IV matches.
|
||||
*/
|
||||
size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
|
||||
unsigned char *out,
|
||||
const unsigned char *in, size_t inlen,
|
||||
block128_f block)
|
||||
{
|
||||
/* n: number of 64-bit blocks in the padded key data */
|
||||
size_t n = inlen / 8 - 1;
|
||||
size_t padded_len;
|
||||
size_t padding_len;
|
||||
size_t ptext_len;
|
||||
/* RFC 5649 section 3: Alternative Initial Value */
|
||||
unsigned char aiv[8];
|
||||
static unsigned char zeros[8] = { 0x0 };
|
||||
size_t ret;
|
||||
|
||||
/* Section 4.2: Ciphertext length has to be (n+1) 64-bit blocks. */
|
||||
if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX)
|
||||
return 0;
|
||||
|
||||
memmove(out, in, inlen);
|
||||
if (inlen == 16) {
|
||||
/*
|
||||
* Section 4.2 - special case in step 1: When n=1, the ciphertext
|
||||
* contains exactly two 64-bit blocks and they are decrypted as a
|
||||
* single AES block using AES in ECB mode: AIV | P[1] = DEC(K, C[0] |
|
||||
* C[1])
|
||||
*/
|
||||
block(out, out, key);
|
||||
memcpy(aiv, out, 8);
|
||||
/* Remove AIV */
|
||||
memmove(out, out + 8, 8);
|
||||
padded_len = 8;
|
||||
} else {
|
||||
padded_len = inlen - 8;
|
||||
ret = crypto_128_unwrap_raw(key, aiv, out, out, inlen, block);
|
||||
if (padded_len != ret) {
|
||||
OPENSSL_cleanse(out, inlen);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Section 3: AIV checks: Check that MSB(32,A) = A65959A6. Optionally a
|
||||
* user-supplied value can be used (even if standard doesn't mention
|
||||
* this).
|
||||
*/
|
||||
if ((!icv && CRYPTO_memcmp(aiv, default_aiv, 4))
|
||||
|| (icv && CRYPTO_memcmp(aiv, icv, 4))) {
|
||||
OPENSSL_cleanse(out, inlen);
|
||||
return 0;
|
||||
}
|
||||
return inlen;
|
||||
|
||||
/*
|
||||
* Check that 8*(n-1) < LSB(32,AIV) <= 8*n. If so, let ptext_len =
|
||||
* LSB(32,AIV).
|
||||
*/
|
||||
|
||||
ptext_len = ((unsigned int)aiv[4] << 24)
|
||||
| ((unsigned int)aiv[5] << 16)
|
||||
| ((unsigned int)aiv[6] << 8)
|
||||
| (unsigned int)aiv[7];
|
||||
if (8 * (n - 1) >= ptext_len || ptext_len > 8 * n) {
|
||||
OPENSSL_cleanse(out, inlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the rightmost padding_len octets of the output data are
|
||||
* zero.
|
||||
*/
|
||||
padding_len = padded_len - ptext_len;
|
||||
if (CRYPTO_memcmp(out + ptext_len, zeros, padding_len) != 0) {
|
||||
OPENSSL_cleanse(out, inlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Section 4.2 step 3: Remove padding */
|
||||
return ptext_len;
|
||||
}
|
||||
|
||||
@@ -1,63 +1,16 @@
|
||||
/* ====================================================================
|
||||
* Copyright (c) 2011 The OpenSSL Project. All rights reserved.
|
||||
/*
|
||||
* Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
||||
*
|
||||
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
||||
* endorse or promote products derived from this software without
|
||||
* prior written permission. For written permission, please contact
|
||||
* openssl-core@openssl.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "OpenSSL"
|
||||
* nor may "OpenSSL" appear in their names without prior written
|
||||
* permission of the OpenSSL Project.
|
||||
*
|
||||
* 6. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by the OpenSSL Project
|
||||
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
|
||||
#include <openssl/crypto.h>
|
||||
#include "modes_lcl.h"
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MODES_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
|
||||
const unsigned char iv[16],
|
||||
const unsigned char *inp, unsigned char *out,
|
||||
|
||||
Reference in New Issue
Block a user